From 8d022a0fcc3d1349fe7f409022a28eec865e3a32 Mon Sep 17 00:00:00 2001 From: Wazir Ahmed Date: Wed, 28 Jan 2026 14:07:27 +0530 Subject: [PATCH] RAG: Add 'model' request parameter for embedding service - `model` is a required parameter for embedding request in OpenAI API specification. - https://platform.openai.com/docs/api-reference/embeddings Signed-off-by: Wazir Ahmed --- include/GenAI_Thread.h | 1 + lib/GenAI_Thread.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/GenAI_Thread.h b/include/GenAI_Thread.h index 238365d52..2b9ddaad7 100644 --- a/include/GenAI_Thread.h +++ b/include/GenAI_Thread.h @@ -196,6 +196,7 @@ public: // Service endpoints char* genai_embedding_uri; ///< URI for embedding service (default: http://127.0.0.1:8013/embedding) char* genai_rerank_uri; ///< URI for reranking service (default: http://127.0.0.1:8012/rerank) + char* genai_embedding_model; ///< Embedding model name (default: empty) // Timeouts (in milliseconds) int genai_embedding_timeout_ms; ///< Timeout for embedding requests (default: 30000) diff --git a/lib/GenAI_Thread.cpp b/lib/GenAI_Thread.cpp index dc763fdbb..7955de15c 100644 --- a/lib/GenAI_Thread.cpp +++ b/lib/GenAI_Thread.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ static const char* genai_thread_variables_names[] = { // Original GenAI variables "threads", "embedding_uri", + "embedding_model", "rerank_uri", "embedding_timeout_ms", "rerank_timeout_ms", @@ -156,6 +158,7 @@ GenAI_Threads_Handler::GenAI_Threads_Handler() { variables.genai_threads = 4; variables.genai_embedding_uri = strdup("http://127.0.0.1:8013/embedding"); variables.genai_rerank_uri = strdup("http://127.0.0.1:8012/rerank"); + variables.genai_embedding_model = strdup(""); variables.genai_embedding_timeout_ms = 30000; variables.genai_rerank_timeout_ms = 30000; @@ -212,6 +215,8 @@ GenAI_Threads_Handler::~GenAI_Threads_Handler() { free(variables.genai_embedding_uri); if (variables.genai_rerank_uri) free(variables.genai_rerank_uri); + if (variables.genai_embedding_model) + free(variables.genai_embedding_model); // Free LLM bridge string variables if (variables.genai_llm_provider) @@ -376,6 +381,9 @@ char* GenAI_Threads_Handler::get_variable(char* name) { if (!strcmp(name, "rerank_uri")) { return strdup(variables.genai_rerank_uri ? variables.genai_rerank_uri : ""); } + if (!strcmp(name, "embedding_model")) { + return strdup(variables.genai_embedding_model ? variables.genai_embedding_model : ""); + } if (!strcmp(name, "embedding_timeout_ms")) { char buf[64]; snprintf(buf, sizeof(buf), "%d", variables.genai_embedding_timeout_ms); @@ -529,6 +537,12 @@ bool GenAI_Threads_Handler::set_variable(char* name, const char* value) { variables.genai_rerank_uri = strdup(value); return true; } + if (!strcmp(name, "embedding_model")) { + if (variables.genai_embedding_model) + free(variables.genai_embedding_model); + variables.genai_embedding_model = strdup(value); + return true; + } if (!strcmp(name, "embedding_timeout_ms")) { int val = atoi(value); if (val < 100 || val > 300000) { @@ -935,6 +949,7 @@ GenAI_EmbeddingResult GenAI_Threads_Handler::call_llama_batch_embedding(const st // Build JSON request using nlohmann/json json payload; payload["input"] = texts; + payload["model"] = std::string(variables.genai_embedding_model); std::string json_str = payload.dump(); // Configure curl