第五章:SQL 查询¶
基本 SQL¶
SELECT¶
-- 基本查询
SELECT * FROM events LIMIT 10;
-- 选择列
SELECT event_id, event_type, event_time FROM events;
-- 过滤
SELECT * FROM events WHERE event_type = 'click';
-- 排序
SELECT * FROM events ORDER BY event_time DESC LIMIT 100;
-- 分页
SELECT * FROM events LIMIT 100 OFFSET 200;
WHERE 条件¶
-- 基本条件
SELECT * FROM events WHERE event_type = 'click';
-- 多条件
SELECT * FROM events
WHERE event_type = 'click'
AND event_time > now() - INTERVAL 1 DAY;
-- IN 条件
SELECT * FROM events WHERE event_type IN ('click', 'purchase');
-- LIKE 条件
SELECT * FROM events WHERE event_type LIKE '%click%';
-- 正则匹配
SELECT * FROM events WHERE match(event_type, 'click|purchase');
聚合函数¶
基本聚合¶
-- COUNT
SELECT COUNT(*) FROM events;
SELECT COUNT(DISTINCT user_id) FROM events;
-- SUM
SELECT SUM(amount) FROM orders;
-- AVG
SELECT AVG(amount) FROM orders;
-- MIN/MAX
SELECT MIN(event_time), MAX(event_time) FROM events;
GROUP BY¶
-- 分组聚合
SELECT
event_type,
COUNT(*) as count
FROM events
GROUP BY event_type;
-- 多列分组
SELECT
toDate(event_time) as date,
event_type,
COUNT(*) as count
FROM events
GROUP BY date, event_type;
-- WITH ROLLUP
SELECT
toDate(event_time) as date,
event_type,
COUNT(*) as count
FROM events
GROUP BY date, event_type WITH ROLLUP;
-- WITH CUBE
SELECT
toDate(event_time) as date,
event_type,
COUNT(*) as count
FROM events
GROUP BY date, event_type WITH CUBE;
HAVING¶
高级聚合¶
uniq 系列¶
-- uniq: 近似去重计数
SELECT uniq(user_id) FROM events;
-- uniqExact: 精确去重计数
SELECT uniqExact(user_id) FROM events;
-- uniqCombined: 组合去重
SELECT uniqCombined(user_id) FROM events;
-- uniqHLL: HyperLogLog 去重
SELECT uniqHLL(user_id) FROM events;
topK¶
histogram¶
窗口函数¶
ROW_NUMBER¶
-- 行号
SELECT
user_id,
event_time,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY event_time) as row_num
FROM events;
RANK¶
LAG/LEAD¶
-- 上一行/下一行
SELECT
event_time,
amount,
LAG(amount) OVER (ORDER BY event_time) as prev_amount,
LEAD(amount) OVER (ORDER BY event_time) as next_amount
FROM orders;
聚合窗口¶
-- 累计求和
SELECT
event_time,
amount,
SUM(amount) OVER (ORDER BY event_time) as running_sum
FROM orders;
-- 移动平均
SELECT
event_time,
amount,
AVG(amount) OVER (
ORDER BY event_time
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg
FROM orders;
JOIN¶
INNER JOIN¶
LEFT JOIN¶
多表 JOIN¶
SELECT
e.event_id,
e.event_type,
u.name,
o.amount
FROM events e
LEFT JOIN users u ON e.user_id = u.user_id
LEFT JOIN orders o ON e.event_id = o.event_id;
JOIN 注意事项¶
-- ClickHouse JOIN 特点
-- 1. 不支持事务
-- 2. 大表 JOIN 小表性能好
-- 3. 建议使用字典代替 JOIN
-- 使用字典
SELECT
event_id,
dictGet('user_dict', 'name', user_id) as name
FROM events;
子查询¶
标量子查询¶
SELECT
event_id,
(SELECT COUNT(*) FROM events WHERE user_id = e.user_id) as user_event_count
FROM events e;
IN 子查询¶
WITH 子句¶
WITH
(SELECT COUNT(*) FROM events) as total_count
SELECT
event_type,
COUNT(*) as count,
COUNT(*) / total_count as ratio
FROM events
GROUP BY event_type;
小结¶
SQL 查询要点:
- 基本 SQL:SELECT、WHERE、ORDER BY
- 聚合函数:COUNT、SUM、AVG、GROUP BY
- 高级聚合:uniq、topK、histogram
- 窗口函数:ROW_NUMBER、RANK、LAG/LEAD
- JOIN:INNER JOIN、LEFT JOIN
下一章我们将学习数据导入导出。