📊 SQL Mastery for Data Analysis
SQL is the universal language for data analysis and business intelligence. Master SQL to unlock the power of relational databases and become proficient in data querying, manipulation, and transformation.
Table of Contents
🎯 SQL Fundamentals
Basic SELECT Statement
-- Simple query
SELECT name, salary, department
FROM employees
WHERE salary > 50000
ORDER BY salary DESC
LIMIT 10;
-- Select all columns
SELECT * FROM employees;
-- Select with alias
SELECT
emp_id AS employee_id,
CONCAT(first_name, ' ', last_name) AS full_name,
salary * 12 AS annual_salary
FROM employees;
WHERE Clause Operators
-- Comparison operators
SELECT * FROM employees WHERE age > 30;
SELECT * FROM employees WHERE salary != 50000;
SELECT * FROM employees WHERE hire_date >= '2020-01-01';
-- Logical operators
SELECT * FROM employees
WHERE department = 'Sales' AND salary > 60000;
SELECT * FROM employees
WHERE department IN ('Sales', 'Marketing', 'IT');
SELECT * FROM employees
WHERE name LIKE '%Smith%';
Basic Aggregation
-- Count rows
SELECT COUNT(*) AS total_employees FROM employees;
-- Aggregate functions
SELECT
COUNT(*) AS employee_count,
AVG(salary) AS avg_salary,
MIN(salary) AS min_salary,
MAX(salary) AS max_salary,
SUM(salary) AS total_salary
FROM employees;
-- Group By
SELECT
department,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY department
HAVING COUNT(*) > 5
ORDER BY avg_salary DESC;
🔍 Querying Data
Joins
-- INNER JOIN - only matching records
SELECT
e.name,
e.salary,
d.dept_name
FROM employees e
INNER JOIN departments d ON e.dept_id = d.dept_id;
-- LEFT JOIN - all from left table
SELECT
e.name,
d.dept_name
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id;
-- Multiple Joins
SELECT
e.name,
d.dept_name,
p.project_name
FROM employees e
INNER JOIN departments d ON e.dept_id = d.dept_id
INNER JOIN projects p ON e.emp_id = p.assigned_to;
Subqueries
-- Subquery in WHERE
SELECT name, salary FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- Subquery in FROM
SELECT department, avg_sal FROM (
SELECT department, AVG(salary) AS avg_sal
FROM employees
GROUP BY department
) dept_avg
WHERE avg_sal > 50000;
-- Correlated subquery
SELECT e.name, e.salary
FROM employees e
WHERE salary > (
SELECT AVG(salary)
FROM employees
WHERE department = e.department
);
Window Functions
-- ROW_NUMBER
SELECT
name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS sal_rank
FROM employees;
-- Partition By
SELECT
name,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;
-- Running total
SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) AS running_total
FROM sales
ORDER BY date;
🚀 Advanced Concepts
Common Table Expressions (CTE)
-- Single CTE
WITH high_earners AS (
SELECT emp_id, name, salary
FROM employees
WHERE salary > 80000
)
SELECT * FROM high_earners;
-- Multiple CTEs
WITH dept_avg AS (
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
),
high_earners AS (
SELECT e.name, e.salary, d.avg_salary
FROM employees e
INNER JOIN dept_avg d ON e.department = d.department
WHERE e.salary > d.avg_salary
)
SELECT * FROM high_earners;
Union and Combine Results
-- UNION removes duplicates
SELECT name FROM employees
UNION
SELECT name FROM contractors;
-- UNION ALL keeps duplicates
SELECT name FROM employees
UNION ALL
SELECT name FROM contractors;
-- INTERSECT - common records
SELECT name FROM employees
INTERSECT
SELECT name FROM contractors;
Conditional Logic
-- CASE statement
SELECT
name,
salary,
CASE
WHEN salary < 40000 THEN 'Junior'
WHEN salary < 70000 THEN 'Mid'
WHEN salary < 100000 THEN 'Senior'
ELSE 'Executive'
END AS salary_level
FROM employees;
-- Multiple conditions
SELECT
name,
department,
CASE
WHEN department = 'Sales' AND salary > 60000 THEN 'High Performer'
WHEN department = 'Sales' THEN 'Sales'
ELSE 'Other'
END AS classification
FROM employees;
⚡ Optimization
Indexing
-- Create index
CREATE INDEX idx_emp_dept ON employees(department);
-- Multi-column index
CREATE INDEX idx_emp_dept_salary ON employees(department, salary);
-- Drop index
DROP INDEX idx_emp_dept;
Query Performance
-- Use EXPLAIN to analyze query plan
EXPLAIN SELECT * FROM employees WHERE department = 'Sales';
-- Avoid SELECT *
SELECT emp_id, name, salary FROM employees; -- Better
-- Use appropriate data types
CREATE TABLE users (
user_id INT PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(255)
);
Aggregation Optimization
-- Inefficient: Subquery in SELECT
SELECT
name,
(SELECT COUNT(*) FROM orders WHERE emp_id = e.emp_id) AS order_count
FROM employees e;
-- Better: Use Join with aggregation
SELECT
e.name,
COUNT(o.order_id) AS order_count
FROM employees e
LEFT JOIN orders o ON e.emp_id = o.emp_id
GROUP BY e.emp_id, e.name;
✅ Best Practices
1. Always use column aliases for clarity
SELECT
emp_id AS employee_id,
concat(first_name, ' ', last_name) AS full_name
FROM employees;
2. Format queries for readability
-- Good formatting
SELECT
e.name,
d.department_name,
COUNT(o.order_id) AS order_count
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN orders o ON e.emp_id = o.emp_id
GROUP BY e.emp_id, e.name, d.department_name
ORDER BY order_count DESC;
3. Use parameterized queries to prevent SQL injection
-- Poor (vulnerable)
SELECT * FROM users WHERE email = '" + userInput + "'
-- Better (use parameters)
SELECT * FROM users WHERE email = ? -- Parameter binding
4. Understand NULL handling
-- NULL comparisons
SELECT * FROM users WHERE phone IS NULL;
SELECT * FROM users WHERE phone IS NOT NULL;
-- NULL in aggregations
SELECT COUNT(phone) FROM users; -- Excludes NULLs
SELECT COUNT(*) FROM users; -- Includes NULLs
5. Document complex queries
-- Calculate monthly sales by region
-- Excludes cancelled orders and adjusted for returns
SELECT
region,
DATE_TRUNC('month', order_date) AS month,
SUM(total_amount) AS monthly_sales
FROM orders
WHERE status != 'CANCELLED' AND return_status IS NULL
GROUP BY region, DATE_TRUNC('month', order_date)
ORDER BY month DESC;
📚 Resources
- SQL Documentation: Database-specific (MySQL, PostgreSQL, SQL Server)
- LeetCode: SQL practice problems
- Mode Analytics: SQL Tutorial & Editor
- Use EXPLAIN: Analyze and optimize your queries
Last updated: April 12, 2026
Difficulty: Beginner to Advanced
Prerequisites: Basic database concepts