Database Development
Schema design, optimization, and management best practices.
Schema Design
Normalization
-- 1NF: Atomic values, no repeating groups
-- BAD
CREATE TABLE orders (
id INT,
products VARCHAR(255) -- "shirt,pants,shoes" - NOT atomic
);
-- GOOD
CREATE TABLE orders (id INT PRIMARY KEY);
CREATE TABLE order_items (
order_id INT REFERENCES orders(id),
product_id INT REFERENCES products(id),
quantity INT
);
-- 2NF: No partial dependencies (all non-key columns depend on entire PK)
-- 3NF: No transitive dependencies (non-key columns don't depend on other non-key columns)
Data Types
-- Use appropriate types
CREATE TABLE users (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, -- BIGINT for large tables
uuid CHAR(36) NOT NULL UNIQUE, -- Fixed-length UUID
email VARCHAR(255) NOT NULL, -- Variable length
status ENUM('active', 'inactive', 'banned'), -- Constrained values
balance DECIMAL(10,2) NOT NULL DEFAULT 0, -- Exact precision for money
metadata JSON, -- Flexible schema
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
-- PostgreSQL specific
CREATE TABLE events (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
data JSONB NOT NULL, -- Binary JSON, indexable
tags TEXT[] NOT NULL DEFAULT '{}', -- Array type
tsv TSVECTOR, -- Full-text search
created_at TIMESTAMPTZ DEFAULT NOW() -- Timezone-aware
);
Relationships
-- One-to-Many
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
user_id INT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(255) NOT NULL
);
-- Many-to-Many with pivot table
CREATE TABLE post_tags (
post_id INT REFERENCES posts(id) ON DELETE CASCADE,
tag_id INT REFERENCES tags(id) ON DELETE CASCADE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (post_id, tag_id)
);
-- One-to-One
CREATE TABLE user_profiles (
user_id INT PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
bio TEXT,
avatar_url VARCHAR(255)
);
Indexing
Index Types
-- B-Tree (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Composite index (order matters!)
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- Good for: WHERE user_id = ? AND status = ?
-- Good for: WHERE user_id = ?
-- NOT good for: WHERE status = ? (leftmost prefix rule)
-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
-- Partial index (PostgreSQL)
CREATE INDEX idx_active_users ON users(email) WHERE status = 'active';
-- Expression index
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Full-text index (MySQL)
CREATE FULLTEXT INDEX idx_posts_content ON posts(title, content);
-- GIN index for JSONB (PostgreSQL)
CREATE INDEX idx_events_data ON events USING GIN(data);
Index Strategy
-- Index columns used in:
-- 1. WHERE clauses
-- 2. JOIN conditions
-- 3. ORDER BY (if used frequently)
-- 4. Foreign keys
-- Check existing indexes
SHOW INDEX FROM orders; -- MySQL
\d orders -- PostgreSQL
-- Analyze query execution
EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;
Query Optimization
EXPLAIN Analysis
-- MySQL
EXPLAIN SELECT * FROM orders
WHERE user_id = 123
AND created_at > '2024-01-01';
-- Look for:
-- type: "ref" or "range" (good), "ALL" (table scan, bad)
-- key: Which index is used (NULL = no index)
-- rows: Estimated rows examined
-- PostgreSQL
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM orders WHERE user_id = 123;
Common Optimizations
-- BAD: SELECT *
SELECT * FROM users WHERE id = 1;
-- GOOD: Select only needed columns
SELECT id, name, email FROM users WHERE id = 1;
-- BAD: OR can prevent index usage
SELECT * FROM users WHERE email = 'a@b.com' OR name = 'John';
-- GOOD: Use UNION for OR conditions
SELECT * FROM users WHERE email = 'a@b.com'
UNION ALL
SELECT * FROM users WHERE name = 'John' AND email != 'a@b.com';
-- BAD: Functions on indexed columns
SELECT * FROM users WHERE YEAR(created_at) = 2024;
-- GOOD: Use range
SELECT * FROM users
WHERE created_at >= '2024-01-01'
AND created_at < '2025-01-01';
-- BAD: LIKE with leading wildcard
SELECT * FROM products WHERE name LIKE '%shirt%';
-- GOOD: Full-text search
SELECT * FROM products
WHERE MATCH(name) AGAINST('shirt' IN BOOLEAN MODE);
N+1 Problem
-- BAD: N+1 queries
-- Query 1: SELECT * FROM posts LIMIT 10
-- Query 2-11: SELECT * FROM users WHERE id = ? (for each post)
-- GOOD: JOIN
SELECT p.*, u.name as author_name
FROM posts p
JOIN users u ON p.user_id = u.id
LIMIT 10;
-- GOOD: Subquery with IN
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM posts WHERE ...);
Migrations
Migration Best Practices
-- Always wrap in transactions
BEGIN;
-- Add column (non-locking in PostgreSQL)
ALTER TABLE users ADD COLUMN phone VARCHAR(20);
-- Add index concurrently (PostgreSQL, non-locking)
CREATE INDEX CONCURRENTLY idx_users_phone ON users(phone);
-- Rename column safely
ALTER TABLE users RENAME COLUMN phone TO phone_number;
COMMIT;
-- Rollback script
BEGIN;
ALTER TABLE users DROP COLUMN phone_number;
DROP INDEX idx_users_phone;
COMMIT;
Safe Migration Patterns
-- Adding NOT NULL column with default
-- Step 1: Add nullable column
ALTER TABLE users ADD COLUMN status VARCHAR(20);
-- Step 2: Backfill data
UPDATE users SET status = 'active' WHERE status IS NULL;
-- Step 3: Add NOT NULL constraint
ALTER TABLE users ALTER COLUMN status SET NOT NULL;
ALTER TABLE users ALTER COLUMN status SET DEFAULT 'active';
-- Renaming tables (zero downtime)
-- Step 1: Create new table
CREATE TABLE accounts (LIKE users INCLUDING ALL);
-- Step 2: Copy data
INSERT INTO accounts SELECT * FROM users;
-- Step 3: Create triggers for sync
-- Step 4: Switch application
-- Step 5: Drop old table
Performance
Connection Pooling
// Node.js with pg-pool
const { Pool } = require('pg');
const pool = new Pool({
host: 'localhost',
database: 'myapp',
max: 20, // Max connections
idleTimeoutMillis: 30000, // Close idle connections
connectionTimeoutMillis: 2000
});
// Always use pool, not direct connections
const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
Pagination
-- BAD: OFFSET for large datasets
SELECT * FROM posts ORDER BY created_at DESC LIMIT 20 OFFSET 10000;
-- Gets slower as offset increases
-- GOOD: Cursor-based pagination
SELECT * FROM posts
WHERE created_at < '2024-01-15 10:30:00'
ORDER BY created_at DESC
LIMIT 20;
-- GOOD: Keyset pagination with ID
SELECT * FROM posts
WHERE (created_at, id) < ('2024-01-15 10:30:00', 12345)
ORDER BY created_at DESC, id DESC
LIMIT 20;
Batch Operations
-- BAD: Many individual inserts
INSERT INTO logs (message) VALUES ('log1');
INSERT INTO logs (message) VALUES ('log2');
-- ... 1000 more
-- GOOD: Batch insert
INSERT INTO logs (message) VALUES
('log1'),
('log2'),
('log3');
-- Up to ~1000 at a time
-- GOOD: COPY for bulk loading (PostgreSQL)
COPY logs (message) FROM '/path/to/file.csv' WITH CSV;
Transactions
ACID Properties
-- Atomicity: All or nothing
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- If any fails, ROLLBACK
COMMIT;
-- Isolation levels
SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -- Default
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; -- Strictest
-- Deadlock prevention: Always lock in same order
BEGIN;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Lock row
SELECT * FROM accounts WHERE id = 2 FOR UPDATE;
-- Do work
COMMIT;
NoSQL Patterns
Document Database (MongoDB)
// Schema design: Embed vs Reference
// Embed: Data accessed together, 1:few relationships
{
_id: ObjectId("..."),
title: "Blog Post",
author: { // Embedded
name: "John",
email: "john@example.com"
},
comments: [ // Embedded array
{ text: "Great!", user: "Jane" }
]
}
// Reference: Large documents, many relationships
{
_id: ObjectId("..."),
title: "Blog Post",
author_id: ObjectId("...") // Reference to users collection
}
// Indexes
db.posts.createIndex({ "author_id": 1 });
db.posts.createIndex({ "title": "text", "content": "text" }); // Text search
Key-Value (Redis)
# Caching pattern
SET user:123 '{"name":"John"}' EX 3600 # Expires in 1 hour
GET user:123
# Counter
INCR page:views:homepage
GET page:views:homepage
# Rate limiting
INCR rate:ip:192.168.1.1
EXPIRE rate:ip:192.168.1.1 60 # Reset every minute
Backup & Recovery
# MySQL
mysqldump -u root -p database > backup.sql
mysql -u root -p database < backup.sql
# PostgreSQL
pg_dump -Fc database > backup.dump
pg_restore -d database backup.dump
# Point-in-time recovery (PostgreSQL)
# Requires WAL archiving configured
pg_basebackup -D /backup/base -Fp -Xs -P
Monitoring Queries
-- MySQL slow query log
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1; -- Log queries > 1 second
-- PostgreSQL: Currently running queries
SELECT pid, now() - pg_stat_activity.query_start AS duration, query
FROM pg_stat_activity
WHERE state = 'active'
ORDER BY duration DESC;
-- Table sizes
SELECT
table_name,
pg_size_pretty(pg_total_relation_size(quote_ident(table_name)))
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY pg_total_relation_size(quote_ident(table_name)) DESC;