SQL Patterns and Best Practices
Query Fundamentals
SELECT Best Practices
-- Bad: SELECT *
SELECT * FROM users;
-- Good: Explicit columns
SELECT id, name, email, created_at
FROM users
WHERE active = true;
Filtering
-- Use indexes effectively
SELECT * FROM orders
WHERE created_at >= '2024-01-01'
AND status = 'completed';
-- Avoid functions on indexed columns
-- Bad
WHERE YEAR(created_at) = 2024
-- Good
WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01'
JOINs
JOIN Types
-- INNER JOIN: Only matching rows
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- LEFT JOIN: All left rows + matching right
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- Use table aliases consistently
Avoid N+1 Queries
-- Bad: Query per user for orders
-- Application loops and queries
-- Good: Single query with JOIN
SELECT u.id, u.name, o.id as order_id, o.total
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.active = true;
Aggregations
GROUP BY
SELECT
category,
COUNT(*) as product_count,
AVG(price) as avg_price,
SUM(quantity) as total_quantity
FROM products
GROUP BY category
HAVING COUNT(*) > 10;
Window Functions
-- Running totals
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) as running_total
FROM transactions;
-- Rank within groups
SELECT
category,
name,
price,
RANK() OVER (PARTITION BY category ORDER BY price DESC) as price_rank
FROM products;
-- Previous/next values
SELECT
date,
amount,
LAG(amount) OVER (ORDER BY date) as prev_amount,
LEAD(amount) OVER (ORDER BY date) as next_amount
FROM transactions;
Indexing
When to Index
-- Primary keys (automatic)
-- Foreign keys
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- Frequently filtered columns
CREATE INDEX idx_users_email ON users(email);
-- Composite indexes (order matters)
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
Index Guidelines
- Index columns in WHERE, JOIN, ORDER BY
- Put equality conditions first in composite indexes
- Consider partial indexes for filtered queries
- Don't over-index (slows writes)
-- Partial index
CREATE INDEX idx_active_users ON users(email)
WHERE active = true;
-- Covering index
CREATE INDEX idx_orders_covering
ON orders(user_id, status)
INCLUDE (total, created_at);
Common Table Expressions (CTEs)
Readable Queries
WITH active_users AS (
SELECT id, name
FROM users
WHERE active = true
),
user_orders AS (
SELECT user_id, COUNT(*) as order_count
FROM orders
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY user_id
)
SELECT
u.name,
COALESCE(o.order_count, 0) as recent_orders
FROM active_users u
LEFT JOIN user_orders o ON u.id = o.user_id;
Recursive CTEs
-- Hierarchical data (org chart, categories)
WITH RECURSIVE category_tree AS (
-- Base case
SELECT id, name, parent_id, 0 as depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- Recursive case
SELECT c.id, c.name, c.parent_id, ct.depth + 1
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree;
Transactions
ACID Compliance
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- Verify
SELECT id, balance FROM accounts WHERE id IN (1, 2);
COMMIT;
-- or ROLLBACK on error;
Isolation Levels
-- Read committed (default in PostgreSQL)
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- Serializable (strictest)
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
Performance Optimization
EXPLAIN ANALYZE
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id)
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
Common Optimizations
-- Pagination with OFFSET (slow for large offsets)
SELECT * FROM products ORDER BY id LIMIT 20 OFFSET 1000;
-- Keyset pagination (faster)
SELECT * FROM products
WHERE id > 1000
ORDER BY id
LIMIT 20;
-- EXISTS vs IN
-- EXISTS is often faster for subqueries
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id
AND o.total > 100
);
Schema Design
Normalization
-- Avoid data duplication
-- Use foreign keys for relationships
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
status VARCHAR(20) NOT NULL DEFAULT 'pending',
created_at TIMESTAMP NOT NULL DEFAULT NOW()
);
Constraints
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL CHECK (price >= 0),
sku VARCHAR(50) UNIQUE NOT NULL,
category_id INTEGER REFERENCES categories(id)
);
Anti-Patterns to Avoid
SELECT * in production
- Missing indexes on foreign keys
- N+1 query patterns
- Large OFFSET pagination
- Storing arrays/JSON when relations fit better
- Missing NOT NULL constraints
- VARCHAR(255) by default without consideration
- Functions on indexed columns in WHERE