You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
proxysql/scripts/copy_stackexchange_Posts_my...

183 lines
5.6 KiB

#!/usr/bin/env python3
"""
Copy Posts table from MySQL to ProxySQL SQLite3 server.
Uses Python MySQL connectors for direct database access.
"""
import mysql.connector
import sys
import time
# Configuration
SOURCE_CONFIG = {
"host": "127.0.0.1",
"port": 3306,
"user": "stackexchange",
"password": "my-password",
"database": "stackexchange",
"use_pure": True,
"ssl_disabled": True
}
DEST_CONFIG = {
"host": "127.0.0.1",
"port": 6030,
"user": "root",
"password": "root",
"database": "main",
"use_pure": True,
"ssl_disabled": True
}
TABLE_NAME = "Posts"
LIMIT = 0 # 0 for all rows, otherwise limit for testing
BATCH_SIZE = 5000 # Larger batch for full copy
CLEAR_TABLE_FIRST = True # Delete existing data before copying
COLUMNS = [
"SiteId", "Id", "PostTypeId", "AcceptedAnswerId", "ParentId",
"CreationDate", "DeletionDate", "Score", "ViewCount", "Body",
"OwnerUserId", "OwnerDisplayName", "LastEditorUserId", "LastEditorDisplayName",
"LastEditDate", "LastActivityDate", "Title", "Tags", "AnswerCount",
"CommentCount", "FavoriteCount", "ClosedDate", "CommunityOwnedDate", "ContentLicense"
]
def escape_sql_value(value):
"""Escape a value for SQL insertion."""
if value is None:
return "NULL"
# Convert to string
s = str(value)
# Escape single quotes by doubling
escaped = s.replace("'", "''")
return f"'{escaped}'"
def generate_insert(row):
"""Generate INSERT statement for a single row."""
values_str = ", ".join(escape_sql_value(v) for v in row)
columns_str = ", ".join(COLUMNS)
return f"INSERT INTO {TABLE_NAME} ({columns_str}) VALUES ({values_str})"
def main():
print(f"Copying {TABLE_NAME} from MySQL to SQLite3 server...")
print(f"Source: {SOURCE_CONFIG['host']}:{SOURCE_CONFIG['port']}")
print(f"Destination: {DEST_CONFIG['host']}:{DEST_CONFIG['port']}")
if LIMIT > 0:
print(f"Limit: {LIMIT} rows")
else:
print(f"Copying all rows")
# Connect to source (MySQL)
try:
source_conn = mysql.connector.connect(**SOURCE_CONFIG)
source_cursor = source_conn.cursor()
print("✓ Connected to MySQL source")
except Exception as e:
print(f"✗ Failed to connect to source MySQL: {e}")
sys.exit(1)
# Connect to destination (ProxySQL SQLite3 server)
try:
dest_conn = mysql.connector.connect(**DEST_CONFIG)
dest_cursor = dest_conn.cursor()
print("✓ Connected to SQLite3 server destination")
except Exception as e:
print(f"✗ Failed to connect to destination SQLite3 server: {e}")
source_conn.close()
sys.exit(1)
try:
# Clear destination table if requested
if CLEAR_TABLE_FIRST:
print("Clearing destination table...")
dest_cursor.execute(f"DELETE FROM {TABLE_NAME}")
dest_conn.commit()
print("✓ Destination table cleared")
# Build query with optional LIMIT
query = f"SELECT * FROM {TABLE_NAME}"
if LIMIT > 0:
query += f" LIMIT {LIMIT}"
print(f"Executing query: {query}")
source_cursor.execute(query)
rows = 0
errors = 0
start = time.time()
last_report = start
# Fetch and insert rows
print("Starting copy...")
while True:
batch = source_cursor.fetchmany(BATCH_SIZE)
if not batch:
break
for row in batch:
try:
insert_sql = generate_insert(row)
dest_cursor.execute(insert_sql)
rows += 1
except Exception as e:
errors += 1
if errors <= 3:
print(f"Error inserting row {rows+1}: {e}")
if errors == 1:
print(f" Sample INSERT (first 300 chars): {insert_sql[:300]}...")
# Commit batch
dest_conn.commit()
# Progress reporting every 1000 rows or 5 seconds
now = time.time()
if rows % 1000 == 0 or (now - last_report) >= 5:
elapsed = now - start
rate = rows / elapsed if elapsed > 0 else 0
print(f" Processed {rows} rows ({rate:.1f} rows/sec)")
last_report = now
# Final commit
dest_conn.commit()
elapsed = time.time() - start
print(f"\n✓ Copy completed:")
print(f" Rows copied: {rows}")
print(f" Errors: {errors}")
print(f" Time: {elapsed:.1f}s")
if elapsed > 0:
print(f" Rate: {rows/elapsed:.1f} rows/sec")
# Verify counts if no errors
if errors == 0:
# Get source count
if LIMIT > 0:
expected = min(LIMIT, rows)
else:
source_cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
expected = source_cursor.fetchone()[0]
dest_cursor.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
actual = dest_cursor.fetchone()[0]
print(f"\n✓ Verification:")
print(f" Expected rows: {expected}")
print(f" Actual rows: {actual}")
if expected == actual:
print(f" ✓ Counts match!")
else:
print(f" ✗ Count mismatch!")
except Exception as e:
print(f"\n✗ Error during copy: {e}")
sys.exit(1)
finally:
# Cleanup
source_cursor.close()
source_conn.close()
dest_cursor.close()
dest_conn.close()
print("\nConnections closed.")
if __name__ == "__main__":
main()