-- sqlite-rembed Examples and Demonstration -- This SQL file demonstrates the usage of sqlite-rembed integration in ProxySQL -- Connect to ProxySQL SQLite3 server on port 6030 and run these examples: -- mysql -h 127.0.0.1 -P 6030 -u root -proot < sqlite-rembed-examples.sql -- -- IMPORTANT: Replace YOUR_API_KEY with your actual API key in Phase 2 -- -- Generated: 2025-12-23 -------------------------------------------------------------------- -- Cleanup: Remove any existing demonstration tables -------------------------------------------------------------------- DROP TABLE IF EXISTS demo_documents; DROP TABLE IF EXISTS demo_embeddings; -------------------------------------------------------------------- -- Phase 1: Basic Connectivity and Function Verification -------------------------------------------------------------------- -- Verify basic connectivity and confirm sqlite-rembed functions are registered SELECT 'Phase 1: Basic Connectivity' as phase; -- Basic ProxySQL connectivity test SELECT 1 as connectivity_test; -- Available databases SHOW DATABASES; -- Available sqlite-vec functions SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 5; -- Available sqlite-rembed functions SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name; -- Check temp.rembed_clients virtual table exists SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table'; -------------------------------------------------------------------- -- Phase 2: Client Configuration -------------------------------------------------------------------- -- Configure an embedding API client using temp.rembed_clients table -- Note: temp.rembed_clients is per-connection, so client must be registered -- in the same session where embeddings are generated SELECT 'Phase 2: Client Configuration' as phase; -- Create embedding API client using synthetic OpenAI endpoint -- Replace with your own API credentials for production use -- IMPORTANT: Replace YOUR_API_KEY with your actual API key INSERT INTO temp.rembed_clients(name, options) VALUES ('demo-client', rembed_client_options( 'format', 'openai', 'url', 'https://api.synthetic.new/openai/v1/embeddings', 'key', 'YOUR_API_KEY', -- Replace with your actual API key 'model', 'hf:nomic-ai/nomic-embed-text-v1.5' ) ); -- Verify client registration SELECT name FROM temp.rembed_clients; -- View client configuration details SELECT name, json_extract(options, '$.format') as format, json_extract(options, '$.model') as model FROM temp.rembed_clients; -------------------------------------------------------------------- -- Phase 3: Embedding Generation -------------------------------------------------------------------- -- Generate text embeddings using the rembed() function -- Embeddings are generated via HTTP request to the configured API endpoint SELECT 'Phase 3: Embedding Generation' as phase; -- Generate embedding for 'Hello world' and check size (768 dimensions × 4 bytes = 3072 bytes) SELECT length(rembed('demo-client', 'Hello world')) as embedding_size_bytes; -- Generate embedding for longer technical text SELECT length(rembed('demo-client', 'Machine learning algorithms improve with more training data and computational power.')) as embedding_size_bytes; -- Generate embedding for empty text (edge case) SELECT length(rembed('demo-client', '')) as empty_embedding_size; -------------------------------------------------------------------- -- Phase 4: Table Creation and Data Storage -------------------------------------------------------------------- -- Create regular tables for document storage and virtual vector tables -- for embedding storage using sqlite-vec SELECT 'Phase 4: Table Creation and Data Storage' as phase; -- Create regular table for document storage CREATE TABLE demo_documents ( id INTEGER PRIMARY KEY, title TEXT NOT NULL, content TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Create virtual vector table for embeddings with 768 dimensions CREATE VIRTUAL TABLE demo_embeddings USING vec0( embedding float[768] ); -- Insert sample documents with diverse content INSERT INTO demo_documents (id, title, content) VALUES (1, 'Machine Learning', 'Machine learning algorithms improve with more training data and computational power.'), (2, 'Database Systems', 'Database management systems efficiently store, retrieve, and manipulate structured data.'), (3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'), (4, 'Vector Databases', 'Vector databases enable similarity search for embeddings generated by machine learning models.'); -- Verify document insertion SELECT id, title, length(content) as content_length FROM demo_documents; -------------------------------------------------------------------- -- Phase 5: Embedding Generation and Storage -------------------------------------------------------------------- -- Generate embeddings for all documents and store them in the vector table -- for similarity search SELECT 'Phase 5: Embedding Generation and Storage' as phase; -- Generate and store embeddings for all documents INSERT INTO demo_embeddings(rowid, embedding) SELECT id, rembed('demo-client', content) FROM demo_documents; -- Verify embedding count (should be 4) SELECT COUNT(*) as total_embeddings FROM demo_embeddings; -- Check embedding storage format (should be 3072 bytes each) SELECT rowid, length(embedding) as embedding_size_bytes FROM demo_embeddings LIMIT 2; -------------------------------------------------------------------- -- Phase 6: Similarity Search -------------------------------------------------------------------- -- Perform similarity search using the stored embeddings -- sqlite-vec requires either LIMIT or 'k = ?' constraint on KNN queries -- Note: When using JOIN, the LIMIT must be in a subquery for vec0 to recognize it SELECT 'Phase 6: Similarity Search' as phase; -- Direct vector table query: Search for similar embeddings -- Returns rowid and distance for the 3 closest matches SELECT rowid, distance FROM demo_embeddings WHERE embedding MATCH rembed('demo-client', 'data science and algorithms') ORDER BY distance ASC LIMIT 3; -- Similarity search with JOIN using subquery -- First find similar embeddings in subquery with LIMIT, then JOIN with documents SELECT d.title, d.content, e.distance FROM ( SELECT rowid, distance FROM demo_embeddings WHERE embedding MATCH rembed('demo-client', 'artificial intelligence and neural networks') ORDER BY distance ASC LIMIT 3 ) e JOIN demo_documents d ON e.rowid = d.id; -- Exact self-match: Search for a document using its own exact text -- Should return distance close to 0.0 for the exact match (may not be exactly 0 due to floating point) SELECT d.title, e.distance FROM ( SELECT rowid, distance FROM demo_embeddings WHERE embedding MATCH rembed('demo-client', 'Machine learning algorithms improve with more training data and computational power.') ORDER BY distance ASC LIMIT 3 ) e JOIN demo_documents d ON e.rowid = d.id; -------------------------------------------------------------------- -- Phase 7: Edge Cases and Error Handling -------------------------------------------------------------------- -- Demonstrate error handling and edge cases SELECT 'Phase 7: Edge Cases and Error Handling' as phase; -- Error: Non-existent client SELECT rembed('non-existent-client', 'test text'); -- Very long text input SELECT rembed('demo-client', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); -------------------------------------------------------------------- -- Phase 8: Cleanup -------------------------------------------------------------------- -- Clean up demonstration tables SELECT 'Phase 8: Cleanup' as phase; DROP TABLE IF EXISTS demo_documents; DROP TABLE IF EXISTS demo_embeddings; -------------------------------------------------------------------- -- Summary -------------------------------------------------------------------- SELECT 'Demonstration Complete' as phase; SELECT 'All sqlite-rembed integration examples have been executed successfully.' as summary; SELECT 'The demonstration covered:' as coverage; SELECT ' • Client configuration with temp.rembed_clients' as item; SELECT ' • Embedding generation via HTTP API' as item; SELECT ' • Vector table creation and data storage' as item; SELECT ' • Similarity search with generated embeddings' as item; SELECT ' • Error handling and edge cases' as item; SELECT ' ' as blank; SELECT 'These examples can be used as a baseline for building applications' as usage; SELECT 'that leverage sqlite-rembed and sqlite-vec in ProxySQL.' as usage_cont;