From d66248eb4cfa714fb12e59228d11ff04b0f35356 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Mon, 26 Jan 2026 22:00:57 +0500 Subject: [PATCH] add verbose logging for bulk embedding operations Add logs to track embedding generation progress: - Show batch size when collecting chunks for embedding - Display OpenAI API call details (endpoint, model, chunk count) - Confirm successful storage after each batch --- RAG_POC/rag_ingest.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RAG_POC/rag_ingest.cpp b/RAG_POC/rag_ingest.cpp index 1ea4932d3..cbeab3382 100644 --- a/RAG_POC/rag_ingest.cpp +++ b/RAG_POC/rag_ingest.cpp @@ -884,6 +884,8 @@ struct OpenAIEmbeddingProvider : public EmbeddingProvider { if (!url.empty() && url.back() == '/') url.pop_back(); url += "/embeddings"; + std::cerr << " Calling OpenAI API: " << url << " (model=" << model << ", chunks=" << inputs.size() << ")\n"; + CURL* curl = curl_easy_init(); if (!curl) throw std::runtime_error("curl_easy_init failed"); @@ -1105,6 +1107,8 @@ static size_t flush_embedding_batch(std::vector& pending, MySQLDB& db) { if (pending.empty()) return 0; + std::cerr << " Generating embeddings for batch of " << pending.size() << " chunks...\n"; + std::vector inputs; inputs.reserve(pending.size()); for (const auto& p : pending) { @@ -1120,6 +1124,7 @@ static size_t flush_embedding_batch(std::vector& pending, size_t count = pending.size(); pending.clear(); + std::cerr << " Successfully stored " << count << " embeddings\n"; return count; }