You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
proxysql/doc/sqlite-rembed-examples.sql

218 lines
13 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

-- sqlite-rembed Examples and Demonstration
-- This SQL file demonstrates the usage of sqlite-rembed integration in ProxySQL
-- Connect to ProxySQL SQLite3 server on port 6030 and run these examples:
-- mysql -h 127.0.0.1 -P 6030 -u root -proot < sqlite-rembed-examples.sql
--
-- IMPORTANT: Replace YOUR_API_KEY with your actual API key in Phase 2
--
-- Generated: 2025-12-23
--------------------------------------------------------------------
-- Cleanup: Remove any existing demonstration tables
--------------------------------------------------------------------
DROP TABLE IF EXISTS demo_documents;
DROP TABLE IF EXISTS demo_embeddings;
--------------------------------------------------------------------
-- Phase 1: Basic Connectivity and Function Verification
--------------------------------------------------------------------
-- Verify basic connectivity and confirm sqlite-rembed functions are registered
SELECT 'Phase 1: Basic Connectivity' as phase;
-- Basic ProxySQL connectivity test
SELECT 1 as connectivity_test;
-- Available databases
SHOW DATABASES;
-- Available sqlite-vec functions
SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 5;
-- Available sqlite-rembed functions
SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name;
-- Check temp.rembed_clients virtual table exists
SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table';
--------------------------------------------------------------------
-- Phase 2: Client Configuration
--------------------------------------------------------------------
-- Configure an embedding API client using temp.rembed_clients table
-- Note: temp.rembed_clients is per-connection, so client must be registered
-- in the same session where embeddings are generated
SELECT 'Phase 2: Client Configuration' as phase;
-- Create embedding API client using synthetic OpenAI endpoint
-- Replace with your own API credentials for production use
-- IMPORTANT: Replace YOUR_API_KEY with your actual API key
INSERT INTO temp.rembed_clients(name, options) VALUES
('demo-client',
rembed_client_options(
'format', 'openai',
'url', 'https://api.synthetic.new/openai/v1/embeddings',
'key', 'YOUR_API_KEY', -- Replace with your actual API key
'model', 'hf:nomic-ai/nomic-embed-text-v1.5'
)
);
-- Verify client registration
SELECT name FROM temp.rembed_clients;
-- View client configuration details
SELECT name,
json_extract(options, '$.format') as format,
json_extract(options, '$.model') as model
FROM temp.rembed_clients;
--------------------------------------------------------------------
-- Phase 3: Embedding Generation
--------------------------------------------------------------------
-- Generate text embeddings using the rembed() function
-- Embeddings are generated via HTTP request to the configured API endpoint
SELECT 'Phase 3: Embedding Generation' as phase;
-- Generate embedding for 'Hello world' and check size (768 dimensions × 4 bytes = 3072 bytes)
SELECT length(rembed('demo-client', 'Hello world')) as embedding_size_bytes;
-- Generate embedding for longer technical text
SELECT length(rembed('demo-client', 'Machine learning algorithms improve with more training data and computational power.')) as embedding_size_bytes;
-- Generate embedding for empty text (edge case)
SELECT length(rembed('demo-client', '')) as empty_embedding_size;
--------------------------------------------------------------------
-- Phase 4: Table Creation and Data Storage
--------------------------------------------------------------------
-- Create regular tables for document storage and virtual vector tables
-- for embedding storage using sqlite-vec
SELECT 'Phase 4: Table Creation and Data Storage' as phase;
-- Create regular table for document storage
CREATE TABLE demo_documents (
id INTEGER PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create virtual vector table for embeddings with 768 dimensions
CREATE VIRTUAL TABLE demo_embeddings USING vec0(
embedding float[768]
);
-- Insert sample documents with diverse content
INSERT INTO demo_documents (id, title, content) VALUES
(1, 'Machine Learning', 'Machine learning algorithms improve with more training data and computational power.'),
(2, 'Database Systems', 'Database management systems efficiently store, retrieve, and manipulate structured data.'),
(3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'),
(4, 'Vector Databases', 'Vector databases enable similarity search for embeddings generated by machine learning models.');
-- Verify document insertion
SELECT id, title, length(content) as content_length FROM demo_documents;
--------------------------------------------------------------------
-- Phase 5: Embedding Generation and Storage
--------------------------------------------------------------------
-- Generate embeddings for all documents and store them in the vector table
-- for similarity search
SELECT 'Phase 5: Embedding Generation and Storage' as phase;
-- Generate and store embeddings for all documents
INSERT INTO demo_embeddings(rowid, embedding)
SELECT id, rembed('demo-client', content)
FROM demo_documents;
-- Verify embedding count (should be 4)
SELECT COUNT(*) as total_embeddings FROM demo_embeddings;
-- Check embedding storage format (should be 3072 bytes each)
SELECT rowid, length(embedding) as embedding_size_bytes
FROM demo_embeddings LIMIT 2;
--------------------------------------------------------------------
-- Phase 6: Similarity Search
--------------------------------------------------------------------
-- Perform similarity search using the stored embeddings
-- sqlite-vec requires either LIMIT or 'k = ?' constraint on KNN queries
-- Note: When using JOIN, the LIMIT must be in a subquery for vec0 to recognize it
SELECT 'Phase 6: Similarity Search' as phase;
-- Direct vector table query: Search for similar embeddings
-- Returns rowid and distance for the 3 closest matches
SELECT rowid, distance
FROM demo_embeddings
WHERE embedding MATCH rembed('demo-client',
'data science and algorithms')
ORDER BY distance ASC
LIMIT 3;
-- Similarity search with JOIN using subquery
-- First find similar embeddings in subquery with LIMIT, then JOIN with documents
SELECT d.title, d.content, e.distance
FROM (
SELECT rowid, distance
FROM demo_embeddings
WHERE embedding MATCH rembed('demo-client',
'artificial intelligence and neural networks')
ORDER BY distance ASC
LIMIT 3
) e
JOIN demo_documents d ON e.rowid = d.id;
-- Exact self-match: Search for a document using its own exact text
-- Should return distance close to 0.0 for the exact match (may not be exactly 0 due to floating point)
SELECT d.title, e.distance
FROM (
SELECT rowid, distance
FROM demo_embeddings
WHERE embedding MATCH rembed('demo-client',
'Machine learning algorithms improve with more training data and computational power.')
ORDER BY distance ASC
LIMIT 3
) e
JOIN demo_documents d ON e.rowid = d.id;
--------------------------------------------------------------------
-- Phase 7: Edge Cases and Error Handling
--------------------------------------------------------------------
-- Demonstrate error handling and edge cases
SELECT 'Phase 7: Edge Cases and Error Handling' as phase;
-- Error: Non-existent client
SELECT rembed('non-existent-client', 'test text');
-- Very long text input
SELECT rembed('demo-client',

--------------------------------------------------------------------
-- Phase 8: Cleanup
--------------------------------------------------------------------
-- Clean up demonstration tables
SELECT 'Phase 8: Cleanup' as phase;
DROP TABLE IF EXISTS demo_documents;
DROP TABLE IF EXISTS demo_embeddings;
--------------------------------------------------------------------
-- Summary
--------------------------------------------------------------------
SELECT 'Demonstration Complete' as phase;
SELECT 'All sqlite-rembed integration examples have been executed successfully.' as summary;
SELECT 'The demonstration covered:' as coverage;
SELECT ' • Client configuration with temp.rembed_clients' as item;
SELECT ' • Embedding generation via HTTP API' as item;
SELECT ' • Vector table creation and data storage' as item;
SELECT ' • Similarity search with generated embeddings' as item;
SELECT ' • Error handling and edge cases' as item;
SELECT ' ' as blank;
SELECT 'These examples can be used as a baseline for building applications' as usage;
SELECT 'that leverage sqlite-rembed and sqlite-vec in ProxySQL.' as usage_cont;