Ollama4j
A Java library (wrapper/binding) for Ollama server.
Loading...
Searching...
No Matches
Ollama.java
Go to the documentation of this file.
1/*
2 * Ollama4j - Java library for interacting with Ollama server.
3 * Copyright (c) 2025 Amith Koujalgi and contributors.
4 *
5 * Licensed under the MIT License (the "License");
6 * you may not use this file except in compliance with the License.
7 *
8*/
9package io.github.ollama4j;
10
11import com.fasterxml.jackson.databind.ObjectMapper;
12import io.github.ollama4j.exceptions.OllamaException;
13import io.github.ollama4j.exceptions.RoleNotFoundException;
14import io.github.ollama4j.exceptions.ToolInvocationException;
15import io.github.ollama4j.metrics.MetricsRecorder;
16import io.github.ollama4j.models.chat.*;
17import io.github.ollama4j.models.chat.OllamaChatTokenHandler;
18import io.github.ollama4j.models.embed.OllamaEmbedRequest;
19import io.github.ollama4j.models.embed.OllamaEmbedResult;
20import io.github.ollama4j.models.generate.OllamaGenerateRequest;
21import io.github.ollama4j.models.generate.OllamaGenerateStreamObserver;
22import io.github.ollama4j.models.generate.OllamaGenerateTokenHandler;
23import io.github.ollama4j.models.ps.ModelProcessesResult;
24import io.github.ollama4j.models.request.*;
25import io.github.ollama4j.models.response.*;
26import io.github.ollama4j.tools.*;
27import io.github.ollama4j.tools.annotations.OllamaToolService;
28import io.github.ollama4j.tools.annotations.ToolProperty;
29import io.github.ollama4j.tools.annotations.ToolSpec;
30import io.github.ollama4j.utils.Constants;
31import io.github.ollama4j.utils.Utils;
32import java.io.*;
33import java.lang.reflect.InvocationTargetException;
34import java.lang.reflect.Method;
35import java.lang.reflect.Parameter;
36import java.net.URI;
37import java.net.URISyntaxException;
38import java.net.http.HttpClient;
39import java.net.http.HttpRequest;
40import java.net.http.HttpResponse;
41import java.nio.charset.StandardCharsets;
42import java.nio.file.Files;
43import java.time.Duration;
44import java.util.*;
45import java.util.stream.Collectors;
46import lombok.Setter;
47import org.slf4j.Logger;
48import org.slf4j.LoggerFactory;
49
55@SuppressWarnings({"DuplicatedCode", "resource", "SpellCheckingInspection"})
56public class Ollama {
57
58 private static final Logger LOG = LoggerFactory.getLogger(Ollama.class);
59
60 private final String host;
61 private Auth auth;
62
63 private final ToolRegistry toolRegistry = new ToolRegistry();
64
71 @Setter private long requestTimeoutSeconds = 10;
72
74 @Setter private int imageURLReadTimeoutSeconds = 10;
75
77 @Setter private int imageURLConnectTimeoutSeconds = 10;
78
85 @Setter private int maxChatToolCallRetries = 3;
86
95 @Setter
96 @SuppressWarnings({"FieldMayBeFinal", "FieldCanBeLocal"})
97 private int numberOfRetriesForModelPull = 0;
98
105 @Setter private boolean metricsEnabled = false;
106
110 public Ollama() {
111 this.host = "http://localhost:11434";
112 }
113
119 public Ollama(String host) {
120 if (host.endsWith("/")) {
121 this.host = host.substring(0, host.length() - 1);
122 } else {
123 this.host = host;
124 }
125 LOG.info("Ollama4j client initialized. Connected to Ollama server at: {}", this.host);
126 }
127
134 public void setBasicAuth(String username, String password) {
135 this.auth = new BasicAuth(username, password);
136 }
137
143 public void setBearerAuth(String bearerToken) {
144 this.auth = new BearerAuth(bearerToken);
145 }
146
153 public boolean ping() throws OllamaException {
154 long startTime = System.currentTimeMillis();
155 String url = "/api/tags";
156 int statusCode = -1;
157 Object out = null;
158 try {
159 HttpClient httpClient = HttpClient.newHttpClient();
160 HttpRequest httpRequest;
161 HttpResponse<String> response;
162 httpRequest =
163 getRequestBuilderDefault(new URI(this.host + url))
164 .header(
165 Constants.HttpConstants.HEADER_KEY_ACCEPT,
166 Constants.HttpConstants.APPLICATION_JSON)
167 .header(
168 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
169 Constants.HttpConstants.APPLICATION_JSON)
170 .GET()
171 .build();
172 response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
173 statusCode = response.statusCode();
174 return statusCode == 200;
175 } catch (InterruptedException ie) {
176 Thread.currentThread().interrupt();
177 throw new OllamaException("Ping interrupted", ie);
178 } catch (Exception e) {
179 throw new OllamaException("Ping failed", e);
180 } finally {
182 url, "", false, false, false, null, null, startTime, statusCode, out);
183 }
184 }
185
193 long startTime = System.currentTimeMillis();
194 String url = "/api/ps";
195 int statusCode = -1;
196 Object out = null;
197 try {
198 HttpClient httpClient = HttpClient.newHttpClient();
199 HttpRequest httpRequest = null;
200 try {
201 httpRequest =
202 getRequestBuilderDefault(new URI(this.host + url))
203 .header(
204 Constants.HttpConstants.HEADER_KEY_ACCEPT,
205 Constants.HttpConstants.APPLICATION_JSON)
206 .header(
207 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
208 Constants.HttpConstants.APPLICATION_JSON)
209 .GET()
210 .build();
211 } catch (URISyntaxException e) {
212 throw new OllamaException(e.getMessage(), e);
213 }
214 HttpResponse<String> response = null;
215 response = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
216 statusCode = response.statusCode();
217 String responseString = response.body();
218 if (statusCode == 200) {
219 return Utils.getObjectMapper()
220 .readValue(responseString, ModelProcessesResult.class);
221 } else {
222 throw new OllamaException(statusCode + " - " + responseString);
223 }
224 } catch (InterruptedException ie) {
225 Thread.currentThread().interrupt();
226 throw new OllamaException("ps interrupted", ie);
227 } catch (Exception e) {
228 throw new OllamaException("ps failed", e);
229 } finally {
231 url, "", false, false, false, null, null, startTime, statusCode, out);
232 }
233 }
234
241 public List<Model> listModels() throws OllamaException {
242 long startTime = System.currentTimeMillis();
243 String url = "/api/tags";
244 int statusCode = -1;
245 Object out = null;
246 try {
247 HttpClient httpClient = HttpClient.newHttpClient();
248 HttpRequest httpRequest =
249 getRequestBuilderDefault(new URI(this.host + url))
250 .header(
251 Constants.HttpConstants.HEADER_KEY_ACCEPT,
252 Constants.HttpConstants.APPLICATION_JSON)
253 .header(
254 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
255 Constants.HttpConstants.APPLICATION_JSON)
256 .GET()
257 .build();
258 HttpResponse<String> response =
259 httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
260 statusCode = response.statusCode();
261 String responseString = response.body();
262 if (statusCode == 200) {
263 return Utils.getObjectMapper()
264 .readValue(responseString, ListModelsResponse.class)
265 .getModels();
266 } else {
267 throw new OllamaException(statusCode + " - " + responseString);
268 }
269 } catch (InterruptedException ie) {
270 Thread.currentThread().interrupt();
271 throw new OllamaException("listModels interrupted", ie);
272 } catch (Exception e) {
273 throw new OllamaException(e.getMessage(), e);
274 } finally {
276 url, "", false, false, false, null, null, startTime, statusCode, out);
277 }
278 }
279
289 private void handlePullRetry(
290 String modelName, int currentRetry, int maxRetries, long baseDelayMillis)
291 throws InterruptedException {
292 int attempt = currentRetry + 1;
293 if (attempt < maxRetries) {
294 long backoffMillis = baseDelayMillis * (1L << currentRetry);
295 LOG.error(
296 "Failed to pull model {}, retrying in {}s... (attempt {}/{})",
297 modelName,
298 backoffMillis / 1000,
299 attempt,
300 maxRetries);
301 try {
302 Thread.sleep(backoffMillis);
303 } catch (InterruptedException ie) {
304 Thread.currentThread().interrupt();
305 throw ie;
306 }
307 } else {
308 LOG.error(
309 "Failed to pull model {} after {} attempts, no more retries.",
310 modelName,
311 maxRetries);
312 }
313 }
314
321 private void doPullModel(String modelName) throws OllamaException {
322 long startTime = System.currentTimeMillis();
323 String url = "/api/pull";
324 int statusCode = -1;
325 Object out = null;
326 try {
327 String jsonData = new ModelRequest(modelName).toString();
328 HttpRequest request =
329 getRequestBuilderDefault(new URI(this.host + url))
330 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
331 .header(
332 Constants.HttpConstants.HEADER_KEY_ACCEPT,
333 Constants.HttpConstants.APPLICATION_JSON)
334 .header(
335 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
336 Constants.HttpConstants.APPLICATION_JSON)
337 .build();
338 HttpClient client = HttpClient.newHttpClient();
339 HttpResponse<InputStream> response =
340 client.send(request, HttpResponse.BodyHandlers.ofInputStream());
341 statusCode = response.statusCode();
342 InputStream responseBodyStream = response.body();
343 String responseString = "";
344 boolean success = false; // Flag to check the pull success.
345
346 try (BufferedReader reader =
347 new BufferedReader(
348 new InputStreamReader(responseBodyStream, StandardCharsets.UTF_8))) {
349 String line;
350 while ((line = reader.readLine()) != null) {
351 ModelPullResponse modelPullResponse =
352 Utils.getObjectMapper().readValue(line, ModelPullResponse.class);
353 success = processModelPullResponse(modelPullResponse, modelName) || success;
354 }
355 }
356 if (!success) {
357 LOG.error("Model pull failed or returned invalid status.");
358 throw new OllamaException("Model pull failed or returned invalid status.");
359 }
360 if (statusCode != 200) {
361 throw new OllamaException(statusCode + " - " + responseString);
362 }
363 } catch (InterruptedException ie) {
364 Thread.currentThread().interrupt();
365 throw new OllamaException("Thread was interrupted during model pull.", ie);
366 } catch (Exception e) {
367 throw new OllamaException(e.getMessage(), e);
368 } finally {
370 url, "", false, false, false, null, null, startTime, statusCode, out);
371 }
372 }
373
383 @SuppressWarnings("RedundantIfStatement")
384 private boolean processModelPullResponse(ModelPullResponse modelPullResponse, String modelName)
385 throws OllamaException {
386 if (modelPullResponse == null) {
387 LOG.error("Received null response for model pull.");
388 return false;
389 }
390 String error = modelPullResponse.getError();
391 if (error != null && !error.trim().isEmpty()) {
392 throw new OllamaException("Model pull failed: " + error);
393 }
394 String status = modelPullResponse.getStatus();
395 if (status != null) {
396 LOG.debug("{}: {}", modelName, status);
397 if ("success".equalsIgnoreCase(status)) {
398 return true;
399 }
400 }
401 return false;
402 }
403
410 public String getVersion() throws OllamaException {
411 String url = "/api/version";
412 long startTime = System.currentTimeMillis();
413 int statusCode = -1;
414 Object out = null;
415 try {
416 HttpClient httpClient = HttpClient.newHttpClient();
417 HttpRequest httpRequest =
418 getRequestBuilderDefault(new URI(this.host + url))
419 .header(
420 Constants.HttpConstants.HEADER_KEY_ACCEPT,
421 Constants.HttpConstants.APPLICATION_JSON)
422 .header(
423 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
424 Constants.HttpConstants.APPLICATION_JSON)
425 .GET()
426 .build();
427 HttpResponse<String> response =
428 httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
429 statusCode = response.statusCode();
430 String responseString = response.body();
431 if (statusCode == 200) {
432 return Utils.getObjectMapper()
433 .readValue(responseString, OllamaVersion.class)
434 .getVersion();
435 } else {
436 throw new OllamaException(statusCode + " - " + responseString);
437 }
438 } catch (InterruptedException ie) {
439 Thread.currentThread().interrupt();
440 throw new OllamaException("Thread was interrupted", ie);
441 } catch (Exception e) {
442 throw new OllamaException(e.getMessage(), e);
443 } finally {
445 url, "", false, false, false, null, null, startTime, statusCode, out);
446 }
447 }
448
457 public void pullModel(String modelName) throws OllamaException {
458 try {
459 if (numberOfRetriesForModelPull == 0) {
460 this.doPullModel(modelName);
461 return;
462 }
463 int numberOfRetries = 0;
464 long baseDelayMillis = 3000L; // 3 seconds base delay
465 while (numberOfRetries < numberOfRetriesForModelPull) {
466 try {
467 this.doPullModel(modelName);
468 return;
469 } catch (OllamaException e) {
470 handlePullRetry(
471 modelName,
472 numberOfRetries,
473 numberOfRetriesForModelPull,
474 baseDelayMillis);
475 numberOfRetries++;
476 }
477 }
478 throw new OllamaException(
479 "Failed to pull model "
480 + modelName
481 + " after "
482 + numberOfRetriesForModelPull
483 + " retries");
484 } catch (InterruptedException ie) {
485 Thread.currentThread().interrupt();
486 throw new OllamaException("Thread was interrupted", ie);
487 } catch (Exception e) {
488 throw new OllamaException(e.getMessage(), e);
489 }
490 }
491
499 public ModelDetail getModelDetails(String modelName) throws OllamaException {
500 long startTime = System.currentTimeMillis();
501 String url = "/api/show";
502 int statusCode = -1;
503 Object out = null;
504 try {
505 String jsonData = new ModelRequest(modelName).toString();
506 HttpRequest request =
507 getRequestBuilderDefault(new URI(this.host + url))
508 .header(
509 Constants.HttpConstants.HEADER_KEY_ACCEPT,
510 Constants.HttpConstants.APPLICATION_JSON)
511 .header(
512 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
513 Constants.HttpConstants.APPLICATION_JSON)
514 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
515 .build();
516 HttpClient client = HttpClient.newHttpClient();
517 HttpResponse<String> response =
518 client.send(request, HttpResponse.BodyHandlers.ofString());
519 statusCode = response.statusCode();
520 String responseBody = response.body();
521 if (statusCode == 200) {
522 return Utils.getObjectMapper().readValue(responseBody, ModelDetail.class);
523 } else {
524 throw new OllamaException(statusCode + " - " + responseBody);
525 }
526 } catch (InterruptedException ie) {
527 Thread.currentThread().interrupt();
528 throw new OllamaException("Thread was interrupted", ie);
529 } catch (Exception e) {
530 throw new OllamaException(e.getMessage(), e);
531 } finally {
533 url, "", false, false, false, null, null, startTime, statusCode, out);
534 }
535 }
536
544 public void createModel(CustomModelRequest customModelRequest) throws OllamaException {
545 long startTime = System.currentTimeMillis();
546 String url = "/api/create";
547 int statusCode = -1;
548 Object out = null;
549 try {
550 String jsonData = customModelRequest.toString();
551 HttpRequest request =
552 getRequestBuilderDefault(new URI(this.host + url))
553 .header(
554 Constants.HttpConstants.HEADER_KEY_ACCEPT,
555 Constants.HttpConstants.APPLICATION_JSON)
556 .header(
557 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
558 Constants.HttpConstants.APPLICATION_JSON)
559 .POST(
560 HttpRequest.BodyPublishers.ofString(
561 jsonData, StandardCharsets.UTF_8))
562 .build();
563 HttpClient client = HttpClient.newHttpClient();
564 HttpResponse<InputStream> response =
565 client.send(request, HttpResponse.BodyHandlers.ofInputStream());
566 statusCode = response.statusCode();
567 if (statusCode != 200) {
568 String errorBody =
569 new String(response.body().readAllBytes(), StandardCharsets.UTF_8);
570 out = errorBody;
571 throw new OllamaException(statusCode + " - " + errorBody);
572 }
573 try (BufferedReader reader =
574 new BufferedReader(
575 new InputStreamReader(response.body(), StandardCharsets.UTF_8))) {
576 String line;
577 StringBuilder lines = new StringBuilder();
578 while ((line = reader.readLine()) != null) {
580 Utils.getObjectMapper().readValue(line, ModelPullResponse.class);
581 lines.append(line);
582 LOG.debug(res.getStatus());
583 if (res.getError() != null) {
584 out = res.getError();
585 throw new OllamaException(res.getError());
586 }
587 }
588 out = lines;
589 }
590 } catch (InterruptedException e) {
591 Thread.currentThread().interrupt();
592 throw new OllamaException("Thread was interrupted", e);
593 } catch (Exception e) {
594 throw new OllamaException(e.getMessage(), e);
595 } finally {
597 url, "", false, false, false, null, null, startTime, statusCode, out);
598 }
599 }
600
608 public void deleteModel(String modelName, boolean ignoreIfNotPresent) throws OllamaException {
609 long startTime = System.currentTimeMillis();
610 String url = "/api/delete";
611 int statusCode = -1;
612 Object out = null;
613 try {
614 String jsonData = new ModelRequest(modelName).toString();
615 HttpRequest request =
616 getRequestBuilderDefault(new URI(this.host + url))
617 .method(
618 "DELETE",
619 HttpRequest.BodyPublishers.ofString(
620 jsonData, StandardCharsets.UTF_8))
621 .header(
622 Constants.HttpConstants.HEADER_KEY_ACCEPT,
623 Constants.HttpConstants.APPLICATION_JSON)
624 .header(
625 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
626 Constants.HttpConstants.APPLICATION_JSON)
627 .build();
628 HttpClient client = HttpClient.newHttpClient();
629 HttpResponse<String> response =
630 client.send(request, HttpResponse.BodyHandlers.ofString());
631 statusCode = response.statusCode();
632 String responseBody = response.body();
633 out = responseBody;
634 if (statusCode == 404
635 && responseBody.contains("model")
636 && responseBody.contains("not found")) {
637 return;
638 }
639 if (statusCode != 200) {
640 throw new OllamaException(statusCode + " - " + responseBody);
641 }
642 } catch (InterruptedException e) {
643 Thread.currentThread().interrupt();
644 throw new OllamaException("Thread was interrupted", e);
645 } catch (Exception e) {
646 throw new OllamaException(statusCode + " - " + out, e);
647 } finally {
649 url, "", false, false, false, null, null, startTime, statusCode, out);
650 }
651 }
652
662 public void unloadModel(String modelName) throws OllamaException {
663 long startTime = System.currentTimeMillis();
664 String url = "/api/generate";
665 int statusCode = -1;
666 Object out = null;
667 try {
668 ObjectMapper objectMapper = new ObjectMapper();
669 Map<String, Object> jsonMap = new java.util.HashMap<>();
670 jsonMap.put("model", modelName);
671 jsonMap.put("keep_alive", 0);
672 String jsonData = objectMapper.writeValueAsString(jsonMap);
673 HttpRequest request =
674 getRequestBuilderDefault(new URI(this.host + url))
675 .method(
676 "POST",
677 HttpRequest.BodyPublishers.ofString(
678 jsonData, StandardCharsets.UTF_8))
679 .header(
680 Constants.HttpConstants.HEADER_KEY_ACCEPT,
681 Constants.HttpConstants.APPLICATION_JSON)
682 .header(
683 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
684 Constants.HttpConstants.APPLICATION_JSON)
685 .build();
686 LOG.debug("Unloading model with request: {}", jsonData);
687 HttpClient client = HttpClient.newHttpClient();
688 HttpResponse<String> response =
689 client.send(request, HttpResponse.BodyHandlers.ofString());
690 statusCode = response.statusCode();
691 String responseBody = response.body();
692 if (statusCode == 404
693 && responseBody.contains("model")
694 && responseBody.contains("not found")) {
695 LOG.debug("Unload response: {} - {}", statusCode, responseBody);
696 return;
697 }
698 if (statusCode != 200) {
699 LOG.debug("Unload response: {} - {}", statusCode, responseBody);
700 throw new OllamaException(statusCode + " - " + responseBody);
701 }
702 } catch (InterruptedException e) {
703 Thread.currentThread().interrupt();
704 LOG.debug("Unload interrupted: {} - {}", statusCode, out);
705 throw new OllamaException(statusCode + " - " + out, e);
706 } catch (Exception e) {
707 LOG.debug("Unload failed: {} - {}", statusCode, out);
708 throw new OllamaException(statusCode + " - " + out, e);
709 } finally {
711 url, "", false, false, false, null, null, startTime, statusCode, out);
712 }
713 }
714
723 long startTime = System.currentTimeMillis();
724 String url = "/api/embed";
725 int statusCode = -1;
726 Object out = null;
727 try {
728 String jsonData = Utils.getObjectMapper().writeValueAsString(modelRequest);
729 HttpClient httpClient = HttpClient.newHttpClient();
730 HttpRequest request =
731 HttpRequest.newBuilder(new URI(this.host + url))
732 .header(
733 Constants.HttpConstants.HEADER_KEY_ACCEPT,
734 Constants.HttpConstants.APPLICATION_JSON)
735 .POST(HttpRequest.BodyPublishers.ofString(jsonData))
736 .build();
737 HttpResponse<String> response =
738 httpClient.send(request, HttpResponse.BodyHandlers.ofString());
739 statusCode = response.statusCode();
740 String responseBody = response.body();
741 if (statusCode == 200) {
742 return Utils.getObjectMapper().readValue(responseBody, OllamaEmbedResult.class);
743 } else {
744 throw new OllamaException(statusCode + " - " + responseBody);
745 }
746 } catch (InterruptedException e) {
747 Thread.currentThread().interrupt();
748 throw new OllamaException("Thread was interrupted", e);
749 } catch (Exception e) {
750 throw new OllamaException(e.getMessage(), e);
751 } finally {
753 url, "", false, false, false, null, null, startTime, statusCode, out);
754 }
755 }
756
768 throws OllamaException {
769 try {
770 if (request.isUseTools()) {
771 return generateWithToolsInternal(request, streamObserver);
772 }
773
774 if (streamObserver != null) {
775 if (request.isThink()) {
776 return generateSyncForOllamaRequestModel(
777 request,
778 streamObserver.getThinkingStreamHandler(),
779 streamObserver.getResponseStreamHandler());
780 } else {
781 return generateSyncForOllamaRequestModel(
782 request, null, streamObserver.getResponseStreamHandler());
783 }
784 }
785 return generateSyncForOllamaRequestModel(request, null, null);
786 } catch (Exception e) {
787 throw new OllamaException(e.getMessage(), e);
788 }
789 }
790
791 // (No javadoc for private helper, as is standard)
792 private OllamaResult generateWithToolsInternal(
794 throws OllamaException {
795 ArrayList<OllamaChatMessage> msgs = new ArrayList<>();
796 OllamaChatRequest chatRequest = new OllamaChatRequest();
797 chatRequest.setModel(request.getModel());
799 ocm.setRole(OllamaChatMessageRole.USER);
800 ocm.setResponse(request.getPrompt());
801 chatRequest.setMessages(msgs);
802 msgs.add(ocm);
803 OllamaChatTokenHandler hdlr = null;
804 chatRequest.setTools(request.getTools());
805 if (streamObserver != null) {
806 chatRequest.setStream(true);
807 if (streamObserver.getResponseStreamHandler() != null) {
808 hdlr =
809 chatResponseModel ->
810 streamObserver
811 .getResponseStreamHandler()
812 .accept(chatResponseModel.getMessage().getResponse());
813 }
814 }
815 OllamaChatResult res = chat(chatRequest, hdlr);
816 return new OllamaResult(
817 res.getResponseModel().getMessage().getResponse(),
818 res.getResponseModel().getMessage().getThinking(),
819 res.getResponseModel().getTotalDuration(),
820 -1);
821 }
822
834 String model, String prompt, boolean raw, boolean think) throws OllamaException {
835 long startTime = System.currentTimeMillis();
836 String url = "/api/generate";
837 int statusCode = -1;
838 try {
839 OllamaGenerateRequest ollamaRequestModel = new OllamaGenerateRequest(model, prompt);
840 ollamaRequestModel.setRaw(raw);
841 ollamaRequestModel.setThink(think);
842 OllamaAsyncResultStreamer ollamaAsyncResultStreamer =
844 getRequestBuilderDefault(new URI(this.host + url)),
845 ollamaRequestModel,
846 requestTimeoutSeconds);
847 ollamaAsyncResultStreamer.start();
848 statusCode = ollamaAsyncResultStreamer.getHttpStatusCode();
849 return ollamaAsyncResultStreamer;
850 } catch (Exception e) {
851 throw new OllamaException(e.getMessage(), e);
852 } finally {
854 url, model, raw, think, true, null, null, startTime, statusCode, null);
855 }
856 }
857
871 throws OllamaException {
872 try {
873 OllamaChatEndpointCaller requestCaller =
874 new OllamaChatEndpointCaller(host, auth, requestTimeoutSeconds);
875 OllamaChatResult result;
876
877 // only add tools if tools flag is set
878 if (request.isUseTools()) {
879 // add all registered tools to request
880 request.setTools(toolRegistry.getRegisteredTools());
881 }
882
883 if (tokenHandler != null) {
884 request.setStream(true);
885 result = requestCaller.call(request, tokenHandler);
886 } else {
887 result = requestCaller.callSync(request);
888 }
889
890 // check if toolCallIsWanted
891 List<OllamaChatToolCalls> toolCalls =
892 result.getResponseModel().getMessage().getToolCalls();
893 int toolCallTries = 0;
894 while (toolCalls != null
895 && !toolCalls.isEmpty()
896 && toolCallTries < maxChatToolCallRetries) {
897 for (OllamaChatToolCalls toolCall : toolCalls) {
898 String toolName = toolCall.getFunction().getName();
899 for (Tools.Tool t : request.getTools()) {
900 if (t.getToolSpec().getName().equals(toolName)) {
901 ToolFunction toolFunction = t.getToolFunction();
902 if (toolFunction == null) {
903 throw new ToolInvocationException(
904 "Tool function not found: " + toolName);
905 }
906 LOG.debug(
907 "Invoking tool {} with arguments: {}",
908 toolCall.getFunction().getName(),
909 toolCall.getFunction().getArguments());
910 Map<String, Object> arguments = toolCall.getFunction().getArguments();
911 Object res = toolFunction.apply(arguments);
912 String argumentKeys =
913 arguments.keySet().stream()
914 .map(Object::toString)
915 .collect(Collectors.joining(", "));
916 request.getMessages()
917 .add(
920 "[TOOL_RESULTS] "
921 + toolName
922 + "("
923 + argumentKeys
924 + "): "
925 + res
926 + " [/TOOL_RESULTS]"));
927 }
928 }
929 }
930 if (tokenHandler != null) {
931 result = requestCaller.call(request, tokenHandler);
932 } else {
933 result = requestCaller.callSync(request);
934 }
935 toolCalls = result.getResponseModel().getMessage().getToolCalls();
936 toolCallTries++;
937 }
938 return result;
939 } catch (InterruptedException e) {
940 Thread.currentThread().interrupt();
941 throw new OllamaException("Thread was interrupted", e);
942 } catch (Exception e) {
943 throw new OllamaException(e.getMessage(), e);
944 }
945 }
946
952 public void registerTool(Tools.Tool tool) {
953 toolRegistry.addTool(tool);
954 LOG.debug("Registered tool: {}", tool.getToolSpec().getName());
955 }
956
963 public void registerTools(List<Tools.Tool> tools) {
964 toolRegistry.addTools(tools);
965 }
966
971 public void deregisterTools() {
972 toolRegistry.clear();
973 LOG.debug("All tools have been deregistered.");
974 }
975
985 try {
986 Class<?> callerClass = null;
987 try {
988 callerClass =
989 Class.forName(Thread.currentThread().getStackTrace()[2].getClassName());
990 } catch (ClassNotFoundException e) {
991 throw new OllamaException(e.getMessage(), e);
992 }
993
994 OllamaToolService ollamaToolServiceAnnotation =
995 callerClass.getDeclaredAnnotation(OllamaToolService.class);
996 if (ollamaToolServiceAnnotation == null) {
997 throw new IllegalStateException(
998 callerClass + " is not annotated as " + OllamaToolService.class);
999 }
1000
1001 Class<?>[] providers = ollamaToolServiceAnnotation.providers();
1002 for (Class<?> provider : providers) {
1003 registerAnnotatedTools(provider.getDeclaredConstructor().newInstance());
1004 }
1005 } catch (InstantiationException
1006 | NoSuchMethodException
1007 | IllegalAccessException
1008 | InvocationTargetException e) {
1009 throw new OllamaException(e.getMessage());
1010 }
1011 }
1012
1022 public void registerAnnotatedTools(Object object) {
1023 Class<?> objectClass = object.getClass();
1024 Method[] methods = objectClass.getMethods();
1025 for (Method m : methods) {
1026 ToolSpec toolSpec = m.getDeclaredAnnotation(ToolSpec.class);
1027 if (toolSpec == null) {
1028 continue;
1029 }
1030 String operationName = !toolSpec.name().isBlank() ? toolSpec.name() : m.getName();
1031 String operationDesc = !toolSpec.desc().isBlank() ? toolSpec.desc() : operationName;
1032
1033 final Map<String, Tools.Property> params = new HashMap<String, Tools.Property>() {};
1034 LinkedHashMap<String, String> methodParams = new LinkedHashMap<>();
1035 for (Parameter parameter : m.getParameters()) {
1036 final ToolProperty toolPropertyAnn =
1037 parameter.getDeclaredAnnotation(ToolProperty.class);
1038 String propType = parameter.getType().getTypeName();
1039 if (toolPropertyAnn == null) {
1040 methodParams.put(parameter.getName(), null);
1041 continue;
1042 }
1043 String propName =
1044 !toolPropertyAnn.name().isBlank()
1045 ? toolPropertyAnn.name()
1046 : parameter.getName();
1047 methodParams.put(propName, propType);
1048 params.put(
1049 propName,
1050 Tools.Property.builder()
1051 .type(propType)
1052 .description(toolPropertyAnn.desc())
1053 .required(toolPropertyAnn.required())
1054 .build());
1055 }
1056 Tools.ToolSpec toolSpecification =
1057 Tools.ToolSpec.builder()
1058 .name(operationName)
1059 .description(operationDesc)
1060 .parameters(Tools.Parameters.of(params))
1061 .build();
1062 ReflectionalToolFunction reflectionalToolFunction =
1063 new ReflectionalToolFunction(object, m, methodParams);
1064 toolRegistry.addTool(
1065 Tools.Tool.builder()
1066 .toolFunction(reflectionalToolFunction)
1067 .toolSpec(toolSpecification)
1068 .build());
1069 }
1070 }
1071
1078 public OllamaChatMessageRole addCustomRole(String roleName) {
1079 return OllamaChatMessageRole.newCustomRole(roleName);
1080 }
1081
1087 public List<OllamaChatMessageRole> listRoles() {
1089 }
1090
1098 public OllamaChatMessageRole getRole(String roleName) throws RoleNotFoundException {
1099 return OllamaChatMessageRole.getRole(roleName);
1100 }
1101
1102 // technical private methods //
1103
1111 private static String encodeFileToBase64(File file) throws IOException {
1112 return Base64.getEncoder().encodeToString(Files.readAllBytes(file.toPath()));
1113 }
1114
1121 private static String encodeByteArrayToBase64(byte[] bytes) {
1122 return Base64.getEncoder().encodeToString(bytes);
1123 }
1124
1136 private OllamaResult generateSyncForOllamaRequestModel(
1137 OllamaGenerateRequest ollamaRequestModel,
1138 OllamaGenerateTokenHandler thinkingStreamHandler,
1139 OllamaGenerateTokenHandler responseStreamHandler)
1140 throws OllamaException {
1141 long startTime = System.currentTimeMillis();
1142 int statusCode = -1;
1143 Object out = null;
1144 try {
1145 OllamaGenerateEndpointCaller requestCaller =
1146 new OllamaGenerateEndpointCaller(host, auth, requestTimeoutSeconds);
1147 OllamaResult result;
1148 if (responseStreamHandler != null) {
1149 ollamaRequestModel.setStream(true);
1150 result =
1151 requestCaller.call(
1152 ollamaRequestModel, thinkingStreamHandler, responseStreamHandler);
1153 } else {
1154 result = requestCaller.callSync(ollamaRequestModel);
1155 }
1156 statusCode = result.getHttpStatusCode();
1157 out = result;
1158 return result;
1159 } catch (InterruptedException e) {
1160 Thread.currentThread().interrupt();
1161 throw new OllamaException("Thread was interrupted", e);
1162 } catch (Exception e) {
1163 throw new OllamaException(e.getMessage(), e);
1164 } finally {
1167 ollamaRequestModel.getModel(),
1168 ollamaRequestModel.isRaw(),
1169 ollamaRequestModel.isThink(),
1170 ollamaRequestModel.isStream(),
1171 ollamaRequestModel.getOptions(),
1172 ollamaRequestModel.getFormat(),
1173 startTime,
1174 statusCode,
1175 out);
1176 }
1177 }
1178
1185 private HttpRequest.Builder getRequestBuilderDefault(URI uri) {
1186 HttpRequest.Builder requestBuilder =
1187 HttpRequest.newBuilder(uri)
1188 .header(
1189 Constants.HttpConstants.HEADER_KEY_CONTENT_TYPE,
1190 Constants.HttpConstants.APPLICATION_JSON)
1191 .timeout(Duration.ofSeconds(requestTimeoutSeconds));
1192 if (isAuthSet()) {
1193 requestBuilder.header("Authorization", auth.getAuthHeaderValue());
1194 }
1195 return requestBuilder;
1196 }
1197
1203 private boolean isAuthSet() {
1204 return auth != null;
1205 }
1206}
void setBasicAuth(String username, String password)
Definition Ollama.java:134
OllamaChatMessageRole getRole(String roleName)
Definition Ollama.java:1098
OllamaEmbedResult embed(OllamaEmbedRequest modelRequest)
Definition Ollama.java:722
OllamaResult generate(OllamaGenerateRequest request, OllamaGenerateStreamObserver streamObserver)
Definition Ollama.java:766
void unloadModel(String modelName)
Definition Ollama.java:662
void registerTool(Tools.Tool tool)
Definition Ollama.java:952
void pullModel(String modelName)
Definition Ollama.java:457
OllamaChatResult chat(OllamaChatRequest request, OllamaChatTokenHandler tokenHandler)
Definition Ollama.java:870
List< Model > listModels()
Definition Ollama.java:241
List< OllamaChatMessageRole > listRoles()
Definition Ollama.java:1087
OllamaChatMessageRole addCustomRole(String roleName)
Definition Ollama.java:1078
OllamaAsyncResultStreamer generateAsync(String model, String prompt, boolean raw, boolean think)
Definition Ollama.java:833
void setBearerAuth(String bearerToken)
Definition Ollama.java:143
ModelProcessesResult ps()
Definition Ollama.java:192
void createModel(CustomModelRequest customModelRequest)
Definition Ollama.java:544
void registerAnnotatedTools(Object object)
Definition Ollama.java:1022
void registerTools(List< Tools.Tool > tools)
Definition Ollama.java:963
void deleteModel(String modelName, boolean ignoreIfNotPresent)
Definition Ollama.java:608
ModelDetail getModelDetails(String modelName)
Definition Ollama.java:499
static void record(String endpoint, String model, boolean raw, boolean thinking, boolean streaming, Map< String, Object > options, Object format, long startTime, int responseHttpStatus, Object response)
static OllamaChatMessageRole newCustomRole(String roleName)
static OllamaChatMessageRole getRole(String roleName)
OllamaChatResult call(OllamaChatRequest body, OllamaChatTokenHandler tokenHandler)
OllamaResult call(OllamaRequestBody body, OllamaGenerateTokenHandler thinkingStreamHandler, OllamaGenerateTokenHandler responseStreamHandler)
void addTools(List< Tools.Tool > tools)
static ObjectMapper getObjectMapper()
Definition Utils.java:32
Object apply(Map< String, Object > arguments)