SET chunking_json='{"enabled":false,"unit":"chars","chunk_size":4000,"overlap":400,"min_chunk_size":800}'
UPDATE rag_sources
SET chunking_json='${chunking_json_value}'
WHERE source_id=1;
UPDATE rag_sources
SET embedding_json='{"enabled":false}'
@ -79,6 +88,28 @@ assert_eq() {
echo"OK: ${label} = ${actual}"
}
fts_count(){
localdb="$1"
localq="$2"
run_sqlite "${db}""SELECT COUNT(*) FROM rag_fts_chunks WHERE rag_fts_chunks MATCH '${q}';"
}
fts_bm25_top(){
localdb="$1"
localq="$2"
run_sqlite "${db}""SELECT chunk_id FROM rag_fts_chunks WHERE rag_fts_chunks MATCH '${q}' ORDER BY bm25(rag_fts_chunks) LIMIT 1;"
}
print_samples(){
localdb="$1"
echo"==> Sample rag_documents"
run_sqlite "${db}""SELECT doc_id, source_id, substr(title,1,40) AS title, json_extract(metadata_json,'$.Score') AS score FROM rag_documents ORDER BY doc_id LIMIT 5;"
echo"==> Sample rag_chunks"
run_sqlite "${db}""SELECT chunk_id, doc_id, chunk_index, substr(body,1,50) AS body FROM rag_chunks ORDER BY chunk_id LIMIT 5;"
echo"==> Sample rag_fts_chunks matches for 'ProxySQL'"
run_sqlite "${db}""SELECT chunk_id, substr(title,1,40) AS title FROM rag_fts_chunks WHERE rag_fts_chunks MATCH 'ProxySQL' ORDER BY chunk_id LIMIT 5;"