SQL Practical Cheatsheet — Complete Reference for Everyday Commands

Basic CRUD
JOIN
GROUP BY + Aggregate Functions
Subquery
CTE (Common Table Expression) — The King of Readability
Window Functions
Index Strategy
Execution Plan (EXPLAIN)
Practical Pattern Collection

Basic CRUD

SELECT (Query)

-- Basic query
SELECT * FROM users WHERE age >= 20 ORDER BY created_at DESC LIMIT 10;

-- Specific columns only
SELECT id, name, email FROM users WHERE status = 'active';

-- Alias
SELECT
    u.name AS user_name,
    COUNT(o.id) AS order_count,
    SUM(o.amount) AS total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.name;

-- DISTINCT (remove duplicates)
SELECT DISTINCT department FROM employees;

-- BETWEEN, IN, LIKE
SELECT * FROM products
WHERE price BETWEEN 10000 AND 50000
  AND category IN ('electronics', 'books')
  AND name LIKE '%Galaxy%';

-- NULL handling
SELECT name, COALESCE(phone, 'Not registered') AS phone
FROM users
WHERE email IS NOT NULL;

INSERT

-- Single insert
INSERT INTO users (name, email, age) VALUES ('Kim Youngju', 'yj@example.com', 30);

-- Multiple inserts
INSERT INTO users (name, email, age) VALUES
    ('Hong Gildong', 'hong@example.com', 25),
    ('Lee Sunsin', 'lee@example.com', 35),
    ('King Sejong', 'sejong@example.com', 45);

-- INSERT from SELECT (table copy)
INSERT INTO users_backup (name, email, age)
SELECT name, email, age FROM users WHERE status = 'active';

-- UPSERT (UPDATE if exists, INSERT if not)
-- PostgreSQL
INSERT INTO users (email, name, login_count)
VALUES ('yj@example.com', 'Kim Youngju', 1)
ON CONFLICT (email)
DO UPDATE SET
    login_count = users.login_count + 1,
    last_login = NOW();

-- MySQL
INSERT INTO users (email, name, login_count)
VALUES ('yj@example.com', 'Kim Youngju', 1)
ON DUPLICATE KEY UPDATE
    login_count = login_count + 1,
    last_login = NOW();

UPDATE

-- Basic update
UPDATE users SET status = 'inactive' WHERE last_login < '2025-01-01';

-- Update multiple columns at once
UPDATE products
SET price = price * 1.1,           -- 10% increase
    updated_at = NOW()
WHERE category = 'electronics';

-- JOIN UPDATE (update referencing another table)
-- PostgreSQL
UPDATE orders o
SET status = 'cancelled'
FROM users u
WHERE o.user_id = u.id
  AND u.status = 'banned';

-- MySQL
UPDATE orders o
JOIN users u ON o.user_id = u.id
SET o.status = 'cancelled'
WHERE u.status = 'banned';

-- Conditional UPDATE with CASE
UPDATE employees
SET salary = CASE
    WHEN department = 'engineering' THEN salary * 1.15
    WHEN department = 'sales' THEN salary * 1.10
    ELSE salary * 1.05
END
WHERE hire_date < '2024-01-01';

-- Warning: UPDATE without WHERE modifies ALL rows!
-- Always verify with SELECT first!
SELECT * FROM users WHERE last_login < '2025-01-01';  -- Verify first
UPDATE users SET status = 'inactive' WHERE last_login < '2025-01-01';

DELETE

-- Basic delete
DELETE FROM sessions WHERE expired_at < NOW();

-- JOIN DELETE
-- PostgreSQL
DELETE FROM orders
USING users
WHERE orders.user_id = users.id AND users.status = 'deleted';

-- MySQL
DELETE o FROM orders o
JOIN users u ON o.user_id = u.id
WHERE u.status = 'deleted';

-- TRUNCATE (delete all, fast, resets AUTO_INCREMENT)
TRUNCATE TABLE logs;

-- Soft delete pattern (recommended)
UPDATE users SET deleted_at = NOW() WHERE id = 123;
-- When querying:
SELECT * FROM users WHERE deleted_at IS NULL;

JOIN

-- INNER JOIN (only rows that exist in both)
SELECT u.name, o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- LEFT JOIN (all from left + matching from right)
SELECT u.name, COALESCE(COUNT(o.id), 0) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name;
-- → Includes users with no orders (order_count = 0)

-- RIGHT JOIN (all from right + matching from left)
-- Rarely used, flipping LEFT JOIN is more readable

-- FULL OUTER JOIN (all from both sides)
SELECT u.name, o.amount
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;

-- CROSS JOIN (all combinations, Cartesian product)
SELECT s.size, c.color
FROM sizes s CROSS JOIN colors c;
-- 3 sizes x 4 colors = 12 combinations

-- SELF JOIN (join with itself)
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

JOIN Diagram:

INNER JOIN:    A intersection B (intersection only)
LEFT JOIN:     A + (A intersection B)
RIGHT JOIN:    (A intersection B) + B
FULL OUTER:    A union B (union)

GROUP BY + Aggregate Functions

-- Basic aggregation
SELECT
    department,
    COUNT(*) AS emp_count,
    AVG(salary) AS avg_salary,
    MAX(salary) AS max_salary,
    MIN(salary) AS min_salary,
    SUM(salary) AS total_salary
FROM employees
GROUP BY department
HAVING AVG(salary) > 50000000  -- Only departments with average salary above 50M
ORDER BY avg_salary DESC;

-- ROLLUP (subtotals + grand total)
SELECT
    COALESCE(department, '=== Total ===') AS department,
    COALESCE(position, '--- Subtotal ---') AS position,
    COUNT(*) AS count,
    AVG(salary) AS avg_salary
FROM employees
GROUP BY ROLLUP(department, position);

Subquery

-- WHERE subquery
SELECT * FROM users
WHERE id IN (
    SELECT user_id FROM orders
    WHERE amount > 1000000
);

-- FROM subquery (inline view)
SELECT dept_name, avg_salary
FROM (
    SELECT department AS dept_name, AVG(salary) AS avg_salary
    FROM employees
    GROUP BY department
) sub
WHERE avg_salary > 60000000;

-- EXISTS (check existence, faster than IN for large datasets)
SELECT u.name
FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o
    WHERE o.user_id = u.id AND o.status = 'completed'
);

-- Scalar subquery (in SELECT clause)
SELECT
    name,
    salary,
    salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;

CTE (Common Table Expression) — The King of Readability

-- Basic CTE
WITH active_users AS (
    SELECT id, name, email
    FROM users
    WHERE status = 'active' AND last_login > NOW() - INTERVAL '30 days'
),
user_orders AS (
    SELECT user_id, COUNT(*) AS order_count, SUM(amount) AS total
    FROM orders
    WHERE created_at > NOW() - INTERVAL '30 days'
    GROUP BY user_id
)
SELECT
    au.name,
    au.email,
    COALESCE(uo.order_count, 0) AS orders,
    COALESCE(uo.total, 0) AS total_spent
FROM active_users au
LEFT JOIN user_orders uo ON au.id = uo.user_id
ORDER BY total_spent DESC;

-- Recursive CTE (org charts, category trees)
WITH RECURSIVE org_tree AS (
    -- Base case: CEO (manager_id is NULL)
    SELECT id, name, manager_id, 1 AS level
    FROM employees WHERE manager_id IS NULL

    UNION ALL

    -- Recursive case: traverse subordinates
    SELECT e.id, e.name, e.manager_id, ot.level + 1
    FROM employees e
    JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT REPEAT('  ', level - 1) || name AS org_chart, level
FROM org_tree
ORDER BY level, name;

Window Functions

-- ROW_NUMBER (sequential numbering)
SELECT
    name, department, salary,
    ROW_NUMBER() OVER (ORDER BY salary DESC) AS rank_all,
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rank_dept
FROM employees;

-- RANK vs DENSE_RANK
-- RANK: 1, 2, 2, 4 (ties at 2nd, then 4th)
-- DENSE_RANK: 1, 2, 2, 3 (ties at 2nd, then 3rd)

-- LAG / LEAD (reference previous/next row)
SELECT
    date,
    revenue,
    LAG(revenue) OVER (ORDER BY date) AS prev_day,
    revenue - LAG(revenue) OVER (ORDER BY date) AS daily_change,
    ROUND(
        (revenue - LAG(revenue) OVER (ORDER BY date))
        / LAG(revenue) OVER (ORDER BY date) * 100, 1
    ) AS change_pct
FROM daily_sales;

-- Running Total
SELECT
    date, amount,
    SUM(amount) OVER (ORDER BY date) AS running_total,
    AVG(amount) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7d
FROM daily_sales;

-- NTILE (divide into quantiles)
SELECT
    name, salary,
    NTILE(4) OVER (ORDER BY salary DESC) AS quartile
    -- 1=top 25%, 2=25~50%, 3=50~75%, 4=bottom 25%
FROM employees;

Index Strategy

-- Create index
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at DESC);
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Composite index order matters!
-- idx(a, b, c):
--   WHERE a = 1                    Used
--   WHERE a = 1 AND b = 2         Used
--   WHERE a = 1 AND b = 2 AND c = 3 Used
--   WHERE b = 2                    NOT used! (leading column missing)
--   WHERE a = 1 AND c = 3         Partially used (a only, b skipped)

-- Partial index (PostgreSQL)
CREATE INDEX idx_active_users ON users(email) WHERE status = 'active';

-- Covering index (query resolved from index alone, no table access)
CREATE INDEX idx_covering ON orders(user_id, status, amount);
SELECT status, SUM(amount) FROM orders WHERE user_id = 123 GROUP BY status;
-- → Only reads from index! (no table I/O)

Execution Plan (EXPLAIN)

-- PostgreSQL
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id)
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'
GROUP BY u.name;

-- How to read:
-- Seq Scan: Full table scan (need an index?)
-- Index Scan: Index used
-- Index Only Scan: Covering index
-- Nested Loop: JOIN for small datasets
-- Hash Join: JOIN for large datasets
-- Sort: ORDER BY (disk if memory exceeded)
-- Bitmap Heap Scan: Combining multiple indexes

-- MySQL
EXPLAIN FORMAT=JSON
SELECT * FROM orders WHERE user_id = 123 AND status = 'completed';

Practical Pattern Collection

Pagination

-- OFFSET method (simple but slow for large datasets)
SELECT * FROM posts ORDER BY id DESC LIMIT 20 OFFSET 40;

-- Cursor-based (recommended for large datasets!)
SELECT * FROM posts
WHERE id < 12345  -- Last seen id
ORDER BY id DESC
LIMIT 20;

Deduplication

-- Find duplicate rows
SELECT email, COUNT(*) as cnt
FROM users
GROUP BY email
HAVING COUNT(*) > 1;

-- Delete duplicates, keep only the oldest
DELETE FROM users
WHERE id NOT IN (
    SELECT MIN(id) FROM users GROUP BY email
);

-- Or using CTE
WITH ranked AS (
    SELECT id, ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) AS rn
    FROM users
)
DELETE FROM users WHERE id IN (SELECT id FROM ranked WHERE rn > 1);

-- Today / this month / this year
SELECT * FROM orders WHERE created_at::date = CURRENT_DATE;
SELECT * FROM orders WHERE DATE_TRUNC('month', created_at) = DATE_TRUNC('month', NOW());

-- Daily stats for last 7 days
SELECT
    DATE(created_at) AS date,
    COUNT(*) AS orders,
    SUM(amount) AS revenue
FROM orders
WHERE created_at >= NOW() - INTERVAL '7 days'
GROUP BY DATE(created_at)
ORDER BY date;

-- Hourly distribution
SELECT
    EXTRACT(HOUR FROM created_at) AS hour,
    COUNT(*) AS count
FROM orders
GROUP BY hour
ORDER BY hour;

Lock Considerations

-- SELECT FOR UPDATE (pessimistic locking)
BEGIN;
SELECT * FROM products WHERE id = 1 FOR UPDATE;  -- Other transactions wait
UPDATE products SET stock = stock - 1 WHERE id = 1;
COMMIT;

-- Optimistic locking (version column)
UPDATE products
SET stock = stock - 1, version = version + 1
WHERE id = 1 AND version = 5;  -- Only updates if version matches
-- Affected rows = 0 means someone else updated first!

Quiz — Practical SQL (Click to check!)

Q1. What is the difference between LEFT JOIN and INNER JOIN? ||LEFT JOIN: Includes all rows from the left table; NULL if no match on right. INNER JOIN: Returns only rows that match in both tables.||

Q2. How do you write UPSERT in PostgreSQL and MySQL respectively? ||PostgreSQL: INSERT ... ON CONFLICT (key) DO UPDATE SET ... MySQL: INSERT ... ON DUPLICATE KEY UPDATE ...||

Q3. What happens with composite index idx(a, b, c) when you use only WHERE b = 2? ||The index is not used. Composite indexes are used from left to right. It does not work without the leading column (a).||

Q4. What is the difference between ROW_NUMBER and DENSE_RANK? ||ROW_NUMBER: Always sequential numbers (no ties). DENSE_RANK: Same rank for ties, next rank is the immediate next number (1,2,2,3). RANK skips after ties (1,2,2,4).||

Q5. Why is OFFSET pagination slow for large datasets? ||OFFSET N reads and discards N rows. OFFSET 1,000,000 reads 1 million rows before returning results. Cursor-based pagination uses an index to jump directly to the starting point.||

Q6. What is a covering index? ||An index that contains all columns needed by the query, so results can be returned from the index alone without accessing the table. This is called Index Only Scan.||

Q7. What is SELECT FOR UPDATE used for and what should you watch out for? ||Pessimistic locking — locks the selected rows so other transactions cannot modify them. Caution: If the transaction takes too long, other transactions will wait, risking deadlock.||

Q8. What happens if you run UPDATE without WHERE? ||All rows in the table are modified! Always verify the target with SELECT before running UPDATE. In production, wrap in a transaction and COMMIT after verification.||