高级查询
0x01 JOIN 连接查询
INNER JOIN(内连接)
只返回两个表中都匹配的行:
-- 基本内连接
SELECT
u.username,
o.order_id,
o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 多表连接
SELECT
u.username,
o.order_id,
p.product_name,
oi.quantity
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;
LEFT JOIN(左连接)
返回左表所有行,右表不匹配的显示 NULL:
-- 查询所有用户及其订单(包括没有订单的用户)
SELECT
u.username,
o.order_id,
o.total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id;
-- 查找没有订单的用户
SELECT
u.username
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;
RIGHT JOIN(右连接)
返回右表所有行,左表不匹配的显示 NULL:
-- 查询所有订单及其用户
SELECT
u.username,
o.order_id
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id;
FULL JOIN(全连接)
返回两个表的所有行:
-- PostgreSQL 支持全连接
SELECT
u.username,
o.order_id
FROM users u
FULL JOIN orders o ON u.id = o.user_id;
-- MySQL 不支持 FULL JOIN,可用 UNION 模拟
SELECT u.username, o.order_id FROM users u LEFT JOIN orders o ON u.id = o.user_id
UNION
SELECT u.username, o.order_id FROM users u RIGHT JOIN orders o ON u.id = o.user_id;
CROSS JOIN(交叉连接)
返回两个表的笛卡尔积:
-- 交叉连接(所有组合)
SELECT
u.username,
p.product_name
FROM users u
CROSS JOIN products p;
SELF JOIN(自连接)
表与自身连接:
-- 查找员工及其经理
SELECT
e.name AS employee,
m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
0x02 子查询
WHERE 子查询
-- 标量子查询
SELECT * FROM users WHERE age = (SELECT MAX(age) FROM users);
-- 列子查询
SELECT * FROM users WHERE id IN (SELECT user_id FROM orders);
-- NOT IN 子查询
SELECT * FROM users WHERE id NOT IN (SELECT user_id FROM orders);
-- EXISTS 子查询
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
-- NOT EXISTS 子查询
SELECT * FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
FROM 子查询
-- 派生表
SELECT
avg_data.age_group,
avg_data.avg_amount
FROM (
SELECT
CASE
WHEN u.age < 25 THEN 'young'
WHEN u.age < 45 THEN 'middle'
ELSE 'senior'
END AS age_group,
AVG(o.total_amount) AS avg_amount
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY age_group
) AS avg_data
WHERE avg_data.avg_amount > 100;
SELECT 子查询
-- 相关子查询
SELECT
u.username,
u.age,
(SELECT COUNT(*) FROM orders o WHERE o.user_id = u.id) AS order_count,
(SELECT MAX(o.total_amount) FROM orders o WHERE o.user_id = u.id) AS max_order
FROM users u;
0x03 聚合函数
常用聚合函数
-- COUNT: 计数
SELECT COUNT(*) FROM users; -- 总行数
SELECT COUNT(phone) FROM users; -- 非 NULL 行数
SELECT COUNT(DISTINCT country) FROM users; -- 不重复计数
-- SUM: 求和
SELECT SUM(total_amount) FROM orders;
-- AVG: 平均值
SELECT AVG(age) FROM users;
-- MAX: 最大值
SELECT MAX(age) FROM users;
SELECT MAX(created_at) FROM users; -- 最新日期
-- MIN: 最小值
SELECT MIN(age) FROM users;
SELECT MIN(created_at) FROM users; -- 最早日期
GROUP BY 分组
-- 按国家分组统计
SELECT
country,
COUNT(*) AS user_count,
AVG(age) AS avg_age
FROM users
GROUP BY country;
-- 多列分组
SELECT
country,
city,
COUNT(*) AS user_count
FROM users
GROUP BY country, city;
HAVING 过滤分组
-- HAVING 过滤分组结果
SELECT
country,
COUNT(*) AS user_count,
AVG(age) AS avg_age
FROM users
GROUP BY country
HAVING COUNT(*) > 100 AND AVG(age) > 25;
-- WHERE vs HAVING
-- WHERE: 分组前过滤(不能使用聚合函数)
-- HAVING: 分组后过滤(可以使用聚合函数)
SELECT
country,
COUNT(*) AS user_count
FROM users
WHERE age >= 18 -- 分组前过滤
GROUP BY country
HAVING COUNT(*) > 50; -- 分组后过滤
0x04 窗口函数
基本窗口函数
-- ROW_NUMBER: 行号
SELECT
username,
age,
ROW_NUMBER() OVER (ORDER BY age DESC) AS row_num
FROM users;
-- RANK: 排名(有并列)
SELECT
username,
age,
RANK() OVER (ORDER BY age DESC) AS rank
FROM users;
-- DENSE_RANK: 密集排名(无间隔)
SELECT
username,
age,
DENSE_RANK() OVER (ORDER BY age DESC) AS dense_rank
FROM users;
分区窗口函数
-- 按国家分组排名
SELECT
username,
country,
age,
ROW_NUMBER() OVER (PARTITION BY country ORDER BY age DESC) AS country_rank
FROM users;
-- 每个国家年龄最大的用户
WITH ranked_users AS (
SELECT
username,
country,
age,
ROW_NUMBER() OVER (PARTITION BY country ORDER BY age DESC) AS rn
FROM users
)
SELECT * FROM ranked_users WHERE rn = 1;
聚合窗口函数
-- 累计求和
SELECT
order_id,
order_date,
total_amount,
SUM(total_amount) OVER (ORDER BY order_date) AS running_total
FROM orders;
-- 移动平均
SELECT
order_id,
order_date,
total_amount,
AVG(total_amount) OVER (
ORDER BY order_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg
FROM orders;
-- 分组聚合
SELECT
username,
country,
age,
AVG(age) OVER (PARTITION BY country) AS country_avg_age,
age - AVG(age) OVER (PARTITION BY country) AS age_diff
FROM users;
LAG 和 LEAD 函数
-- LAG: 获取前一行的值
SELECT
order_id,
order_date,
total_amount,
LAG(total_amount) OVER (ORDER BY order_date) AS prev_amount
FROM orders;
-- LEAD: 获取后一行的值
SELECT
order_id,
order_date,
total_amount,
LEAD(total_amount) OVER (ORDER BY order_date) AS next_amount
FROM orders;
-- 计算与前一天的差额
SELECT
order_id,
order_date,
total_amount,
total_amount - LAG(total_amount) OVER (ORDER BY order_date) AS amount_diff
FROM orders;
0x05 CTE 公用表表达式
基本 CTE
-- 非递归 CTE
WITH user_stats AS (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total_amount) AS total_spent
FROM orders
GROUP BY user_id
)
SELECT
u.username,
us.order_count,
us.total_spent
FROM users u
JOIN user_stats us ON u.id = us.user_id;
递归 CTE
-- 生成数字序列
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 100
)
SELECT * FROM numbers;
-- 组织架构树
WITH RECURSIVE org_tree AS (
-- 基础查询:顶级管理者
SELECT id, name, manager_id, 1 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:下属员工
SELECT e.id, e.name, e.manager_id, ot.level + 1
FROM employees e
JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT * FROM org_tree ORDER BY level, name;
0x06 高级查询技巧
PIVOT(行转列)
-- MySQL 使用 CASE 语句实现
SELECT
product_id,
SUM(CASE WHEN quarter = 'Q1' THEN sales END) AS Q1,
SUM(CASE WHEN quarter = 'Q2' THEN sales END) AS Q2,
SUM(CASE WHEN quarter = 'Q3' THEN sales END) AS Q3,
SUM(CASE WHEN quarter = 'Q4' THEN sales END) AS Q4
FROM sales
GROUP BY product_id;
-- PostgreSQL 使用 crosstab 函数
SELECT * FROM crosstab(
'SELECT product_id, quarter, sales FROM sales ORDER BY 1,2',
'SELECT DISTINCT quarter FROM sales ORDER BY 1'
) AS ct(product_id INT, Q1 INT, Q2 INT, Q3 INT, Q4 INT);
UNPIVOT(列转行)
-- 使用 UNION ALL 实现
SELECT product_id, 'Q1' AS quarter, Q1 AS sales FROM sales
UNION ALL
SELECT product_id, 'Q2', Q2 FROM sales
UNION ALL
SELECT product_id, 'Q3', Q3 FROM sales
UNION ALL
SELECT product_id, 'Q4', Q4 FROM sales;
字符串聚合
-- MySQL: GROUP_CONCAT
SELECT
department,
GROUP_CONCAT(employee_name ORDER BY employee_name SEPARATOR ', ') AS employees
FROM employees
GROUP BY department;
-- PostgreSQL: STRING_AGG
SELECT
department,
STRING_AGG(employee_name, ', ' ORDER BY employee_name) AS employees
FROM employees
GROUP BY department;
参考
目录