From 02f2ff5e2c72f9572cd7d8d313f11485f8b6ff48 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Thu, 29 Jan 2026 14:29:51 +0500 Subject: [PATCH] Fix logging system: remove stderr bypass, thread-safe timestamps, and std::tolower UB - Remove std::cerr calls that bypassed --log-level (no more duplicate output) - Use localtime_r for thread-safe timestamp formatting - Fix std::tolower undefined behavior with unsigned char cast - Minor: add markdown language tag, standardize CLI option docs --- RAG_POC/INGEST_USAGE_GUIDE.md | 10 +++++----- RAG_POC/rag_ingest.cpp | 25 +++++++------------------ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/RAG_POC/INGEST_USAGE_GUIDE.md b/RAG_POC/INGEST_USAGE_GUIDE.md index 9c4973eda..60b99cd7a 100644 --- a/RAG_POC/INGEST_USAGE_GUIDE.md +++ b/RAG_POC/INGEST_USAGE_GUIDE.md @@ -125,16 +125,16 @@ Control log verbosity with `--log-level` (available for all commands): **Examples:** ```bash # Minimal output (errors only) -./rag_ingest ingest --log-level=error -h 127.0.0.1 -P 6030 -u root -p root -D rag_db +./rag_ingest ingest --log-level=error --host=127.0.0.1 --port=6030 --user=root --password=root --database=rag_db # Default (info level) -./rag_ingest ingest -h 127.0.0.1 -P 6030 -u root -p root -D rag_db +./rag_ingest ingest --host=127.0.0.1 --port=6030 --user=root --password=root --database=rag_db # Detailed debugging -./rag_ingest ingest --log-level=debug -h 127.0.0.1 -P 6030 -u root -p root -D rag_db +./rag_ingest ingest --log-level=debug --host=127.0.0.1 --port=6030 --user=root --password=root --database=rag_db # Maximum verbosity -./rag_ingest ingest --log-level=trace -h 127.0.0.1 -P 6030 -u root -p root -D rag_db +./rag_ingest ingest --log-level=trace --host=127.0.0.1 --port=6030 --user=root --password=root --database=rag_db ``` **Output Format:** @@ -449,7 +449,7 @@ The tool tracks the last processed primary key value in `rag_sync_state`. Subseq Each data source is processed in its own transaction: -``` +```text Source 1: BEGIN IMMEDIATE → ingest data → COMMIT ✅ Source 2: BEGIN IMMEDIATE → ingest data → ROLLBACK ❌ (error occurred) Source 3: BEGIN IMMEDIATE → ingest data → COMMIT ✅ diff --git a/RAG_POC/rag_ingest.cpp b/RAG_POC/rag_ingest.cpp index e192a59a4..fd64ae173 100644 --- a/RAG_POC/rag_ingest.cpp +++ b/RAG_POC/rag_ingest.cpp @@ -122,6 +122,7 @@ #include #include #include +#include #include #include @@ -206,7 +207,9 @@ struct Logger { auto now = std::chrono::system_clock::now(); auto time = std::chrono::system_clock::to_time_t(now); char time_buf[64]; - std::strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S", std::localtime(&time)); + struct tm timeinfo; + localtime_r(&time, &timeinfo); + std::strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S", &timeinfo); out << "[" << time_buf << "] "; } @@ -1226,8 +1229,6 @@ struct OpenAIEmbeddingProvider : public EmbeddingProvider { ", inputs=" + std::to_string(inputs.size()) + ", dim=" + std::to_string(dim)); - std::cerr << " Calling OpenAI API: " << url << " (model=" << model << ", chunks=" << inputs.size() << ")\n"; - CURL* curl = curl_easy_init(); if (!curl) throw std::runtime_error("curl_easy_init failed"); @@ -1523,7 +1524,6 @@ static size_t flush_embedding_batch(std::vector& pending, if (pending.empty()) return 0; g_logger.info(std::string("Generating embeddings for batch of ") + std::to_string(pending.size()) + " chunks..."); - std::cerr << " Generating embeddings for batch of " << pending.size() << " chunks...\n"; g_logger.trace("Building input texts for embedding batch..."); std::vector inputs; @@ -1545,7 +1545,6 @@ static size_t flush_embedding_batch(std::vector& pending, size_t count = pending.size(); pending.clear(); g_logger.info(std::string("Successfully stored ") + std::to_string(count) + " embeddings"); - std::cerr << " Successfully stored " << count << " embeddings\n"; return count; } @@ -1558,14 +1557,9 @@ static void ingest_source(MySQLDB& db, const RagSource& src) { ", name=" + src.name + ", backend=" + src.backend_type + ", table=" + src.table_name + " ==="); - std::cerr << "Ingesting source_id=" << src.source_id - << " name=" << src.name - << " backend=" << src.backend_type - << " table=" << src.table_name << "\n"; if (src.backend_type != "mysql") { g_logger.warn(std::string("Skipping source ") + src.name + ": backend_type '" + src.backend_type + "' not supported"); - std::cerr << " Skipping: backend_type not supported in v0.\n"; return; } @@ -1718,8 +1712,6 @@ static void ingest_source(MySQLDB& db, const RagSource& src) { g_logger.info(std::string("Progress: ingested_docs=") + std::to_string(ingested_docs) + ", skipped_docs=" + std::to_string(skipped_docs) + ", chunks=" + std::to_string(total_chunks)); - std::cerr << " progress: ingested_docs=" << ingested_docs - << " skipped_docs=" << skipped_docs << "\n"; } } @@ -1753,10 +1745,6 @@ static void ingest_source(MySQLDB& db, const RagSource& src) { if (ecfg.enabled) { g_logger.info(std::string(" embedding_batches=") + std::to_string(embedding_batches)); } - - std::cerr << "Done source " << src.name - << " ingested_docs=" << ingested_docs - << " skipped_docs=" << skipped_docs << "\n"; } // =========================================================================== @@ -1915,7 +1903,6 @@ static bool init_schema(MySQLDB& db, int vec_dim = 1536) { << ")"; if (!db.try_execute(vec_sql.str().c_str())) { g_logger.warn("vec0 table creation failed (sqlite-vec extension not available). Vector embeddings will be disabled."); - std::cerr << "Warning: vec0 table creation failed (sqlite-vec extension not available). Vector embeddings will be disabled.\n"; } else { g_logger.trace("rag_vec_chunks vec0 table created"); } @@ -1991,7 +1978,9 @@ struct ConnParams { */ static LogLevel parse_log_level(const std::string& level_str) { std::string lower = level_str; - for (char& c : lower) c = std::tolower(c); + for (char& c : lower) { + c = static_cast(std::tolower(static_cast(c))); + } if (lower == "error") return LogLevel::ERROR; if (lower == "warn" || lower == "warning") return LogLevel::WARN;