Python 编程教程 / 07 - 数据结构
第 07 章:数据结构
掌握 Python 内置数据结构及其高级用法,选择合适的数据结构解决实际问题。
7.1 列表(list)
7.1.1 基本操作
# 创建
fruits = ["apple", "banana", "cherry"]
numbers = list(range(5)) # [0, 1, 2, 3, 4]
empty = []
# 访问
print(fruits[0]) # apple
print(fruits[-1]) # cherry
print(fruits[1:3]) # ['banana', 'cherry']
# 修改
fruits[0] = "avocado"
fruits.append("date")
fruits.insert(1, "blueberry")
fruits.extend(["elderberry", "fig"])
fruits += ["grape"]
# 删除
fruits.remove("banana") # 按值删除
popped = fruits.pop() # 弹出末尾元素
popped = fruits.pop(0) # 弹出指定索引
del fruits[1:3] # 删除切片
fruits.clear() # 清空
7.1.2 常用方法
numbers = [3, 1, 4, 1, 5, 9, 2, 6, 5]
# 查找
print(numbers.index(5)) # 4(首次出现)
print(numbers.count(1)) # 2
print(5 in numbers) # True
# 排序
numbers.sort() # 原地排序
numbers.sort(reverse=True) # 降序
numbers.sort(key=lambda x: -x) # 自定义排序
sorted_nums = sorted(numbers) # 返回新列表,不修改原列表
# 反转
numbers.reverse() # 原地反转
reversed_nums = numbers[::-1] # 返回新列表
# 长度
print(len(numbers)) # 9
7.1.3 列表解包
# 基本解包
a, b, c = [1, 2, 3]
# 星号解包
first, *middle, last = [1, 2, 3, 4, 5]
# first=1, middle=[2, 3, 4], last=5
# 函数参数解包
def add(a, b, c):
return a + b + c
print(add(*[1, 2, 3])) # 6
7.2 元组(tuple)
7.2.1 基本操作
# 创建
point = (3, 4)
single = (42,) # 单元素元组需要逗号
empty = ()
from_list = tuple([1, 2, 3])
# 访问
print(point[0]) # 3
print(point[1:]) # (4,)
print(len(point)) # 2
# 不可变
# point[0] = 10 # ❌ TypeError
# 解包
x, y = point
# 命名元组更实用(见下文)
7.2.2 元组 vs 列表
| 特性 | 元组 | 列表 |
|---|---|---|
| 可变性 | 不可变 | 可变 |
| 语法 | (1, 2, 3) | [1, 2, 3] |
| 性能 | 更快,内存更少 | 较慢 |
| 可作为字典键 | ✅ | ❌ |
| 可作为集合元素 | ✅ | ❌ |
| 适用场景 | 固定结构数据 | 动态集合 |
7.3 字典(dict)
7.3.1 基本操作
# 创建
person = {"name": "Alice", "age": 30, "city": "北京"}
from_keys = dict.fromkeys(["a", "b", "c"], 0) # {'a': 0, 'b': 0, 'c': 0}
from_pairs = dict([("a", 1), ("b", 2)])
empty = {}
# 访问
print(person["name"]) # Alice
print(person.get("email", "N/A")) # N/A(安全访问)
# 修改
person["age"] = 31
person.update({"city": "上海", "phone": "123456"})
person.setdefault("email", "alice@example.com") # 键不存在时设置
# 删除
del person["phone"]
email = person.pop("email", None) # 安全删除
person.clear()
# 遍历
user = {"name": "Alice", "age": 30, "city": "北京"}
for key in user:
print(key)
for key, value in user.items():
print(f"{key}: {value}")
for value in user.values():
print(value)
# 检查键
print("name" in user) # True
7.3.2 字典合并
# Python 3.9+ 合并运算符
defaults = {"color": "red", "size": 10, "count": 1}
overrides = {"color": "blue", "count": 5}
# | 运算符(创建新字典)
result = defaults | overrides
# {'color': 'blue', 'size': 10, 'count': 5}
# |= 运算符(原地更新)
defaults |= overrides
# update 方法(更早版本兼容)
merged = {**defaults, **overrides}
7.3.3 字典推导
# 基本推导
squares = {x: x**2 for x in range(6)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
# 过滤
prices = {"apple": 5.0, "banana": 3.0, "cherry": 8.0}
expensive = {k: v for k, v in prices.items() if v > 4}
# {'apple': 5.0, 'cherry': 8.0}
# 键值互换
original = {"a": 1, "b": 2}
inverted = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b'}
7.3.4 defaultdict 和 Counter
from collections import defaultdict, Counter
# defaultdict:自动初始化默认值
word_count = defaultdict(int)
for word in ["apple", "banana", "apple", "cherry", "banana", "apple"]:
word_count[word] += 1
# {'apple': 3, 'banana': 2, 'cherry': 1}
# 分组
students = [
{"name": "Alice", "grade": "A"},
{"name": "Bob", "grade": "B"},
{"name": "Charlie", "grade": "A"},
]
by_grade = defaultdict(list)
for s in students:
by_grade[s["grade"]].append(s["name"])
# {'A': ['Alice', 'Charlie'], 'B': ['Bob']}
# Counter:计数器
text = "hello world"
counter = Counter(text)
print(counter.most_common(3)) # [('l', 3), ('o', 2), ('h', 1)]
# Counter 运算
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
print(c1 + c2) # Counter({'a': 4, 'b': 3})
print(c1 - c2) # Counter({'a': 2})
7.4 集合(set)
7.4.1 基本操作
# 创建
fruits = {"apple", "banana", "cherry"}
from_list = set([1, 2, 2, 3, 3]) # {1, 2, 3}
empty = set() # 注意:{} 创建空字典,不是空集合
# 添加和删除
fruits.add("date")
fruits.discard("banana") # 不存在也不报错
fruits.remove("apple") # 不存在会报 KeyError
# 集合运算
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
print(a | b) # 并集: {1, 2, 3, 4, 5, 6}
print(a & b) # 交集: {3, 4}
print(a - b) # 差集: {1, 2}
print(a ^ b) # 对称差: {1, 2, 5, 6}
# 子集和超集
print({1, 2} <= {1, 2, 3}) # True(子集)
print({1, 2, 3} >= {1, 2}) # True(超集)
7.4.2 集合推导
squares = {x**2 for x in range(-5, 6)}
# {0, 1, 4, 9, 16, 25}
# 去重
words = ["hello", "world", "hello", "python"]
unique = {w.lower() for w in words}
7.5 NamedTuple
from collections import namedtuple
# 创建类型
Point = namedtuple("Point", ["x", "y"])
# 使用
p = Point(3, 4)
print(p.x, p.y) # 3 4
print(p[0], p[1]) # 3 4(也支持索引)
# 解包
x, y = p
# 不可变
# p.x = 10 # ❌ AttributeError
# 带默认值
Point3D = namedtuple("Point3D", "x y z", defaults=[0, 0, 0])
p = Point3D(1, 2)
print(p) # Point3D(x=1, y=2, z=0)
typing.NamedTuple(推荐)
from typing import NamedTuple
class Point(NamedTuple):
x: float
y: float
label: str = ""
p = Point(3.0, 4.0, "origin")
print(p.x, p.y, p.label)
7.6 数据类(dataclass)
7.6.1 基本用法
from dataclasses import dataclass, field
@dataclass
class User:
name: str
age: int
email: str = ""
tags: list[str] = field(default_factory=list)
# 自动生成 __init__, __repr__, __eq__
user = User("Alice", 30, "alice@example.com")
print(user) # User(name='Alice', age=30, email='alice@example.com', tags=[])
# 比较
user2 = User("Alice", 30, "alice@example.com")
print(user == user2) # True(值比较,非引用比较)
7.6.2 dataclass 高级特性
@dataclass(frozen=True) # 不可变
class Config:
host: str = "localhost"
port: int = 8080
config = Config()
# config.port = 9090 # ❌ FrozenInstanceError
@dataclass(order=True) # 支持排序
class Student:
name: str
score: float
students = [Student("Bob", 85), Student("Alice", 92)]
sorted_students = sorted(students, reverse=True)
@dataclass
class Product:
name: str
price: float = field(compare=False) # 排序时不比较该字段
sku: str = field(repr=False, hash=False) # 不显示、不参与哈希
7.6.3 继承
@dataclass
class Animal:
name: str
sound: str
@dataclass
class Dog(Animal):
breed: str = "Unknown"
dog = Dog("Rex", "Woof", "German Shepherd")
print(dog) # Dog(name='Rex', sound='Woof', breed='German Shepherd')
7.7 deque(双端队列)
from collections import deque
# 创建
dq = deque([1, 2, 3])
# 两端操作
dq.append(4) # 右端添加
dq.appendleft(0) # 左端添加
dq.pop() # 右端弹出
dq.popleft() # 左端弹出
# 旋转
dq = deque([1, 2, 3, 4, 5])
dq.rotate(2) # 右旋: deque([4, 5, 1, 2, 3])
dq.rotate(-2) # 左旋: deque([1, 2, 3, 4, 5])
# 有限长度
recent = deque(maxlen=3)
for i in range(5):
recent.append(i)
print(list(recent))
# [0]
# [0, 1]
# [0, 1, 2]
# [1, 2, 3]
# [2, 3, 4]
7.8 数据结构选型
| 需求 | 推荐数据结构 | 理由 |
|---|---|---|
| 有序集合,频繁增删 | list | 灵活的增删操作 |
| 不可变序列 | tuple | 更快,可哈希 |
| 键值映射 | dict | O(1) 查找 |
| 默认值字典 | defaultdict | 避免 KeyError |
| 计数 | Counter | 内置计数功能 |
| 去重 | set | O(1) 成员检查 |
| 两端操作 | deque | O(1) 两端操作 |
| 不可变命名字段 | NamedTuple | 类型安全,内存少 |
| 可变命名字段 | dataclass | 自动生成方法 |
7.9 注意事项
🔴 注意:
{}创建空字典,set()创建空集合- 字典键必须是可哈希(hashable)的类型
dataclass默认生成的__hash__在有可变字段时不可用defaultdict的default_factory每次调用都会创建新的默认值对象
💡 提示:
- 需要频繁查找时,使用
set或dict(O(1))而非list(O(n)) - 需要维护插入顺序时,Python 3.7+ 的
dict已经保证有序 - 处理大量数据时考虑
array模块或 NumPy 数组 dataclass是 Python 3.7+ 中替代 NamedTuple 的首选
📌 业务场景:
from collections import Counter, defaultdict
from dataclasses import dataclass, field
@dataclass
class SalesReport:
"""销售报告。"""
region: str
total: float = 0.0
orders: int = 0
top_products: list[tuple[str, int]] = field(default_factory=list)
def analyze_sales(sales_data: list[dict]) -> list[SalesReport]:
"""分析销售数据。"""
# 按区域分组
by_region = defaultdict(list)
for sale in sales_data:
by_region[sale["region"]].append(sale)
reports = []
for region, sales in by_region.items():
# 统计热销产品
product_counter = Counter(s["product"] for s in sales)
top_products = product_counter.most_common(3)
report = SalesReport(
region=region,
total=sum(s["amount"] for s in sales),
orders=len(sales),
top_products=top_products,
)
reports.append(report)
return sorted(reports, key=lambda r: r.total, reverse=True)
# 示例数据
sales = [
{"region": "华东", "product": "手机", "amount": 5999},
{"region": "华东", "product": "手机", "amount": 5999},
{"region": "华北", "product": "电脑", "amount": 8999},
]
for report in analyze_sales(sales):
print(f"{report.region}: ¥{report.total:.0f} ({report.orders}单)")
print(f" 热销: {report.top_products}")
7.10 扩展阅读
- Data Structures - Python 文档
- collections 模块
- dataclasses 模块
- typing.NamedTuple
- 《流畅的 Python》第 1、3 章