第四章:数据类型与表设计¶
PostgreSQL 数据类型详解¶
数值类型的高级用法¶
序列类型(SERIAL)¶
-- SERIAL 类型实际上是整数类型加上序列
CREATE TABLE users (
id SERIAL PRIMARY KEY, -- 自动创建序列 users_id_seq
username VARCHAR(50)
);
-- 手动创建序列
CREATE SEQUENCE custom_seq START 1000 INCREMENT 5;
CREATE TABLE products (
id INTEGER DEFAULT nextval('custom_seq') PRIMARY KEY,
name VARCHAR(100)
);
-- 序列函数
SELECT currval('users_id_seq'); -- 当前值
SELECT nextval('users_id_seq'); -- 下一个值
SELECT setval('users_id_seq', 1000); -- 设置值
货币类型(MONEY)¶
CREATE TABLE transactions (
id SERIAL PRIMARY KEY,
amount MONEY NOT NULL,
currency VARCHAR(3) DEFAULT 'USD'
);
-- 插入货币数据
INSERT INTO transactions (amount) VALUES ('$100.50');
INSERT INTO transactions (amount) VALUES (150.75);
-- 货币运算
SELECT amount * 0.9 as discounted_amount FROM transactions;
字符类型的编码和排序¶
-- 指定字符集和排序规则
CREATE TABLE multilingual (
id SERIAL PRIMARY KEY,
content TEXT COLLATE "en_US.utf8",
chinese_content TEXT COLLATE "zh_CN.utf8"
);
-- 常用字符串函数
SELECT
username,
LENGTH(username) as length,
OCTET_LENGTH(username) as bytes, -- UTF-8 编码下的字节数
CHAR_LENGTH(username) as chars -- 字符数
FROM users;
-- 正则表达式匹配
SELECT * FROM users WHERE username ~ '^[a-zA-Z]';
SELECT * FROM users WHERE username ~* '^john'; -- 不区分大小写
日期时间类型的时区处理¶
-- 时区相关类型
CREATE TABLE events (
id SERIAL PRIMARY KEY,
event_time TIMESTAMP, -- 无时区
event_time_tz TIMESTAMPTZ, -- 带时区
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
-- 插入带时区的时间
INSERT INTO events (event_time_tz)
VALUES ('2024-01-01 12:00:00+08'); -- 东八区时间
-- 时区转换
SELECT
event_time_tz AT TIME ZONE 'UTC' as utc_time,
event_time_tz AT TIME ZONE 'America/New_York' as ny_time
FROM events;
-- 时间间隔计算
SELECT
event_time_tz + INTERVAL '1 day' as next_day,
event_time_tz - INTERVAL '2 hours' as two_hours_ago,
AGE(CURRENT_TIMESTAMP, event_time_tz) as time_passed
FROM events;
数组类型¶
-- 创建数组列
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
tags TEXT[], -- 文本数组
prices DECIMAL(10,2)[], -- 数值数组
dimensions INTEGER[3] -- 固定长度数组
);
-- 插入数组数据
INSERT INTO products (name, tags, prices, dimensions) VALUES
('Laptop', '{"electronics", "computer", "portable"}', '{999.99, 899.99}', '{35, 25, 2}');
-- 查询数组
SELECT * FROM products WHERE 'electronics' = ANY(tags);
SELECT * FROM products WHERE tags @> '{"computer"}'; -- 包含
SELECT * FROM products WHERE tags && '{"electronics", "gadget"}'; -- 重叠
-- 数组函数
SELECT
name,
array_length(tags, 1) as tag_count,
unnest(tags) as individual_tag, -- 展开数组
array_append(tags, 'new_tag') as new_tags
FROM products;
JSON/JSONB 类型¶
-- JSON 和 JSONB 的区别
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
metadata JSON, -- 存储为文本,保留空格和键顺序
data JSONB -- 二进制存储,索引友好
);
-- 插入 JSON 数据
INSERT INTO documents (metadata, data) VALUES (
'{"name": "John", "age": 30, "hobbies": ["reading", "swimming"]}',
'{"name": "Jane", "age": 25, "address": {"city": "NYC", "zip": "10001"}}'
);
-- JSON 查询操作符
SELECT
data->>'name' as name, -- 获取文本值
data->'address'->>'city' as city, -- 嵌套访问
data @> '{"age": 25}' as is_age_25, -- 包含
data ? 'address' as has_address -- 包含键
FROM documents;
-- JSON 函数
SELECT
jsonb_pretty(data) as formatted_json,
jsonb_array_length(data->'hobbies') as hobby_count,
jsonb_set(data, '{address,city}', '"Boston"') as updated_data
FROM documents;
-- 创建 JSONB 索引
CREATE INDEX idx_documents_data ON documents USING GIN (data);
网络地址类型¶
-- 网络地址类型
CREATE TABLE servers (
id SERIAL PRIMARY KEY,
ip_address INET, -- IPv4 或 IPv6
mac_address MACADDR, -- MAC 地址
cidr_range CIDR -- 网络地址段
);
-- 插入网络地址
INSERT INTO servers (ip_address, mac_address, cidr_range) VALUES
('192.168.1.100', '08:00:2b:01:02:03', '192.168.1.0/24');
-- 网络地址操作
SELECT
ip_address,
family(ip_address) as ip_family, -- 4 或 6
host(ip_address) as host_part,
masklen(cidr_range) as mask_length
FROM servers;
-- 检查 IP 是否在网段内
SELECT * FROM servers WHERE '192.168.1.50' <<= cidr_range;
几何类型(需要 PostGIS 扩展)¶
-- 安装 PostGIS 扩展
CREATE EXTENSION postgis;
-- 几何类型表
CREATE TABLE locations (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
point GEOMETRY(POINT, 4326), -- WGS84 坐标
line GEOMETRY(LINESTRING, 4326),
polygon GEOMETRY(POLYGON, 4326)
);
-- 插入几何数据
INSERT INTO locations (name, point) VALUES
('Beijing', ST_GeomFromText('POINT(116.4074 39.9042)', 4326)),
('Shanghai', ST_GeomFromText('POINT(121.4737 31.2304)', 4326));
-- 几何计算
SELECT
name,
ST_Distance(point, ST_GeomFromText('POINT(120.0 30.0)', 4326)) as distance,
ST_AsText(point) as wkt_format
FROM locations;
表设计最佳实践¶
1. 规范化设计¶
第一范式(1NF)¶
- 每个列都是原子的(不可再分)
- 没有重复的列组
-- 不符合 1NF
CREATE TABLE bad_design (
id SERIAL PRIMARY KEY,
phone_numbers TEXT -- 存储多个电话号码,用逗号分隔
);
-- 符合 1NF
CREATE TABLE good_design (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
phone_number VARCHAR(20)
);
第二范式(2NF)¶
- 满足 1NF
- 所有非主键列完全依赖于主键
-- 不符合 2NF
CREATE TABLE orders_bad (
order_id SERIAL PRIMARY KEY,
product_id INTEGER,
product_name VARCHAR(100), -- 部分依赖于主键
quantity INTEGER
);
-- 符合 2NF
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
order_date DATE
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(order_id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER
);
第三范式(3NF)¶
- 满足 2NF
- 没有传递依赖
-- 不符合 3NF
CREATE TABLE employees_bad (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department_id INTEGER,
department_name VARCHAR(100) -- 传递依赖
);
-- 符合 3NF
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department_id INTEGER REFERENCES departments(id)
);
CREATE TABLE departments (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
2. 主键设计¶
-- 自然主键(不推荐)
CREATE TABLE countries (
country_code CHAR(2) PRIMARY KEY, -- 如 'US', 'CN'
name VARCHAR(100)
);
-- 代理主键(推荐)
CREATE TABLE users (
id SERIAL PRIMARY KEY, -- 代理主键
email VARCHAR(100) UNIQUE NOT NULL, -- 业务键
username VARCHAR(50) UNIQUE
);
-- 复合主键
CREATE TABLE enrollments (
student_id INTEGER REFERENCES students(id),
course_id INTEGER REFERENCES courses(id),
enrollment_date DATE DEFAULT CURRENT_DATE,
PRIMARY KEY (student_id, course_id)
);
3. 外键约束¶
-- 基本外键
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id), -- 默认 RESTRICT
total_amount DECIMAL(10,2)
);
-- 带操作的外键
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id)
ON DELETE CASCADE -- 级联删除
ON UPDATE CASCADE, -- 级联更新
product_id INTEGER REFERENCES products(id)
ON DELETE SET NULL -- 设置为 NULL
ON UPDATE CASCADE,
quantity INTEGER
);
-- 延迟约束
CREATE TABLE invoices (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id)
DEFERRABLE INITIALLY DEFERRED, -- 延迟到事务结束检查
amount DECIMAL(10,2)
);
4. 检查约束¶
-- 简单检查约束
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
price DECIMAL(10,2) CHECK (price > 0),
stock_quantity INTEGER CHECK (stock_quantity >= 0),
discount_price DECIMAL(10,2),
CONSTRAINT valid_discount CHECK (discount_price IS NULL OR discount_price < price)
);
-- 表级检查约束
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
salary DECIMAL(10,2),
manager_id INTEGER REFERENCES employees(id),
CONSTRAINT salary_check CHECK (salary >= 0),
CONSTRAINT manager_check CHECK (manager_id != id OR manager_id IS NULL)
);
5. 默认值和生成列¶
-- 默认值
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
is_active BOOLEAN DEFAULT TRUE,
login_count INTEGER DEFAULT 0
);
-- 生成列(PostgreSQL 12+)
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
price DECIMAL(10,2),
tax_rate DECIMAL(5,4) DEFAULT 0.1,
price_with_tax DECIMAL(10,2) GENERATED ALWAYS AS (price * (1 + tax_rate)) STORED
);
-- 使用触发器维护更新时间
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
6. 表分区¶
-- 创建主表
CREATE TABLE sales (
id SERIAL,
sale_date DATE NOT NULL,
product_id INTEGER,
amount DECIMAL(10,2),
region VARCHAR(50)
) PARTITION BY RANGE (sale_date);
-- 创建分区表
CREATE TABLE sales_2024_q1 PARTITION OF sales
FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
CREATE TABLE sales_2024_q2 PARTITION OF sales
FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');
-- 列表分区
CREATE TABLE sales_by_region (
id SERIAL,
sale_date DATE,
region VARCHAR(50),
amount DECIMAL(10,2)
) PARTITION BY LIST (region);
CREATE TABLE sales_north PARTITION OF sales_by_region
FOR VALUES IN ('North', 'Northeast');
CREATE TABLE sales_south PARTITION OF sales_by_region
FOR VALUES IN ('South', 'Southeast');
7. 继承表¶
-- 父表
CREATE TABLE vehicles (
id SERIAL PRIMARY KEY,
brand VARCHAR(50),
model VARCHAR(50),
year INTEGER
);
-- 子表继承父表
CREATE TABLE cars (
num_doors INTEGER,
fuel_type VARCHAR(20)
) INHERITS (vehicles);
CREATE TABLE trucks (
load_capacity DECIMAL(10,2),
num_axles INTEGER
) INHERITS (vehicles);
-- 查询所有车辆
SELECT * FROM vehicles; -- 包括所有子表数据
SELECT * FROM ONLY vehicles; -- 仅父表数据
数据类型选择建议¶
1. 数值类型选择¶
- 整数:使用
INTEGER或BIGINT,避免SMALLINT - 小数:使用
DECIMAL/NUMERIC处理精确计算 - 自增:使用
SERIAL或BIGSERIAL
2. 字符串类型选择¶
- 短文本:
VARCHAR(n),n 根据实际需要设置 - 长文本:
TEXT,性能与VARCHAR相当 - 固定长度:
CHAR(n)仅当长度绝对固定时使用
3. 时间类型选择¶
- 仅日期:
DATE - 日期时间:
TIMESTAMPTZ(推荐,带时区) - 时间间隔:
INTERVAL
4. 特殊类型选择¶
- UUID:分布式系统主键
- JSONB:半结构化数据
- 数组:存储同类型值的集合
性能考虑¶
1. 列顺序优化¶
2. TOAST 机制¶
- PostgreSQL 自动压缩大字段
TEXT、JSONB等类型使用 TOAST 存储- 超过 2KB 的数据会被压缩和存储到 TOAST 表
3. 填充因子¶
-- 设置表的填充因子
CREATE TABLE frequently_updated (
id SERIAL PRIMARY KEY,
data TEXT
) WITH (fillfactor = 70); -- 为更新预留空间
-- 修改现有表
ALTER TABLE users SET (fillfactor = 80);
总结¶
本章详细介绍了 PostgreSQL 的数据类型和表设计最佳实践: - 各种数据类型的特性和使用场景 - 数据库规范化设计原则 - 主键、外键和约束的设计 - 表分区和继承 - 数据类型选择建议 - 性能优化考虑
正确的数据类型选择和表设计是数据库性能和数据完整性的基础。下一章我们将学习索引和性能优化。