diff --git a/doc/SQLITE-REMBED-TEST-README.md b/doc/SQLITE-REMBED-TEST-README.md index a2a472227..6f93df8ef 100644 --- a/doc/SQLITE-REMBED-TEST-README.md +++ b/doc/SQLITE-REMBED-TEST-README.md @@ -110,12 +110,12 @@ MYSQL_PASS="root" ``` ### API Configuration -The test uses a synthetic OpenAI endpoint by default. Modify these variables to use your own API: +The test uses a synthetic OpenAI endpoint by default. Set `API_KEY` environment variable or modify the variable below to use your own API: ```bash API_CLIENT_NAME="test-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 ``` diff --git a/doc/sqlite-rembed-demo.sh b/doc/sqlite-rembed-demo.sh index f65656a07..014ca1c75 100755 --- a/doc/sqlite-rembed-demo.sh +++ b/doc/sqlite-rembed-demo.sh @@ -31,11 +31,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for demonstration -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="demo-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output @@ -87,6 +87,13 @@ create_demo_sql() { -- ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT} -- API Endpoint: ${API_URL} -------------------------------------------------------------------- +-- Cleanup: Remove any existing demonstration tables +DROP TABLE IF EXISTS demo_documents; +DROP TABLE IF EXISTS demo_embeddings; +DROP TABLE IF EXISTS demo_embeddings_info; +DROP TABLE IF EXISTS demo_embeddings_chunks; +DROP TABLE IF EXISTS demo_embeddings_rowids; +DROP TABLE IF EXISTS demo_embeddings_vector_chunks00; -------------------------------------------------------------------- -- Phase 1: Basic Connectivity and Function Verification @@ -196,7 +203,8 @@ SELECT id, title, length(content) as content_length FROM demo_documents; SELECT 'Phase 5: Embedding Generation and Storage' as phase; -- Generate and store embeddings for all documents -INSERT INTO demo_embeddings(rowid, embedding) +-- Using INSERT OR REPLACE to handle existing rows (cleanup should have removed them) +INSERT OR REPLACE INTO demo_embeddings(rowid, embedding) SELECT id, rembed('$API_CLIENT_NAME', content) FROM demo_documents; @@ -217,28 +225,37 @@ SELECT 'Phase 6: Similarity Search' as phase; -- Exact self-match (should have distance 0.0) SELECT d.title, d.content, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', - 'Machine learning algorithms improve with more training data and computational power.') -LIMIT 3; +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', + 'Machine learning algorithms improve with more training data and computational power.') + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; + -- Similarity search with query text SELECT d.title, d.content, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', 'data science and algorithms') -LIMIT 3; + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; -- Ordered similarity search (closest matches first) -SELECT d.title, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', +SELECT d.title, d.content, e.distance +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', 'artificial intelligence and neural networks') -ORDER BY e.distance ASC -LIMIT 3; + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; -------------------------------------------------------------------- -- Phase 7: Edge Cases and Error Handling @@ -348,4 +365,4 @@ main() { # Run main demonstration main -exit 0 \ No newline at end of file +exit 0 diff --git a/doc/sqlite-rembed-examples.sh b/doc/sqlite-rembed-examples.sh index a36972279..500f9edfc 100755 --- a/doc/sqlite-rembed-examples.sh +++ b/doc/sqlite-rembed-examples.sh @@ -30,11 +30,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for demonstration -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="demo-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output diff --git a/doc/sqlite-rembed-test.sh b/doc/sqlite-rembed-test.sh index a1bb1ad4e..dac942dfc 100755 --- a/doc/sqlite-rembed-test.sh +++ b/doc/sqlite-rembed-test.sh @@ -41,11 +41,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for testing -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="test-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output