====== 第四章 数据结构 ======
===== 本章概要 =====
Python提供了多种内置数据结构,包括列表、元组、字典、集合和字符串。本章将深入学习这些数据结构的特点、操作方法和最佳实践。
===== 4.1 列表(List) =====
列表是Python中最常用的数据结构,是一个**有序、可变**的元素集合。
==== 4.1.1 创建列表 ====
# 直接创建
fruits = ["apple", "banana", "cherry"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True, [1, 2, 3]]
# 使用list()函数
empty = list()
from_string = list("Python") # ['P', 'y', 't', 'h', 'o', 'n']
from_range = list(range(5)) # [0, 1, 2, 3, 4]
# 列表推导式
squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
==== 4.1.2 访问元素 ====
fruits = ["apple", "banana", "cherry", "date", "elderberry"]
# 索引访问
print(fruits[0]) # apple
print(fruits[-1]) # elderberry
# 切片
print(fruits[1:3]) # ['banana', 'cherry']
print(fruits[:3]) # ['apple', 'banana', 'cherry']
print(fruits[2:]) # ['cherry', 'date', 'elderberry']
print(fruits[::2]) # ['apple', 'cherry', 'elderberry']
print(fruits[::-1]) # 反转列表
# 检查成员
print("apple" in fruits) # True
print("grape" not in fruits) # True
==== 4.1.3 修改列表 ====
fruits = ["apple", "banana", "cherry"]
# 修改元素
fruits[0] = "apricot"
# 添加元素
fruits.append("date") # 末尾添加
fruits.insert(1, "blueberry") # 指定位置插入
fruits.extend(["fig", "grape"]) # 批量添加
# 删除元素
fruits.remove("banana") # 删除第一个匹配项
del fruits[0] # 删除指定索引
popped = fruits.pop() # 删除并返回最后一个
popped = fruits.pop(0) # 删除并返回指定索引
fruits.clear() # 清空列表
print(fruits)
==== 4.1.4 列表方法 ====
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# 排序
numbers.sort() # 原地排序
numbers.sort(reverse=True) # 降序
# 或使用sorted()(返回新列表)
sorted_numbers = sorted(numbers)
# 反转
numbers.reverse()
# 查找
print(numbers.index(5)) # 返回第一个匹配的索引
print(numbers.count(1)) # 统计出现次数
# 复制
new_list = numbers.copy() # 浅拷贝
new_list = numbers[:] # 切片复制
new_list = list(numbers) # 构造函数复制
==== 4.1.5 列表操作 ====
# 拼接
list1 = [1, 2, 3]
list2 = [4, 5, 6]
combined = list1 + list2 # [1, 2, 3, 4, 5, 6]
# 重复
repeated = [0] * 5 # [0, 0, 0, 0, 0]
# 长度
print(len(combined))
# 最值
print(max([3, 1, 4, 1, 5]))
print(min([3, 1, 4, 1, 5]))
print(sum([1, 2, 3, 4, 5]))
==== 4.1.6 列表嵌套(二维列表) ====
# 创建二维列表
matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
# 访问元素
print(matrix[0][0]) # 1
print(matrix[1][2]) # 6
# 遍历
for row in matrix:
for item in row:
print(item, end=" ")
print()
# 列表推导式创建二维列表
matrix = [[i * j for j in range(1, 4)] for i in range(1, 4)]
# [[1, 2, 3], [2, 4, 6], [3, 6, 9]]
# ⚠️ 创建二维列表的陷阱
wrong = [[0] * 3] * 3 # 错误!三行引用同一列表
wrong[0][0] = 1 # 所有行的第一个元素都变成1
correct = [[0] * 3 for _ in range(3)] # 正确
===== 4.2 元组(Tuple) =====
元组是**有序、不可变**的元素集合。
==== 4.2.1 创建元组 ====
# 直接创建
point = (3, 4)
single = (1,) # 单元素元组需要逗号
empty = ()
# 省略括号
coordinates = 3, 4, 5
# 使用tuple()函数
from_list = tuple([1, 2, 3])
from_string = tuple("Python")
==== 4.2.2 访问元组 ====
point = (10, 20, 30, 40, 50)
# 索引和切片与列表相同
print(point[0]) # 10
print(point[-1]) # 50
print(point[1:4]) # (20, 30, 40)
# 解包
x, y = (3, 4)
x, y, *rest = (1, 2, 3, 4, 5) # x=1, y=2, rest=[3,4,5]
first, *middle, last = (1, 2, 3, 4, 5)
==== 4.2.3 元组的不可变性 ====
point = (3, 4)
# point[0] = 5 # TypeError: 'tuple' object does not support item assignment
# 但可以修改可变元素
mutable_tuple = ([1, 2], [3, 4])
mutable_tuple[0].append(3) # 可以!
print(mutable_tuple) # ([1, 2, 3], [3, 4])
# "修改"元组:创建新元组
point = (3, 4)
point = (5, 4) # 重新赋值
==== 4.2.4 元组的使用场景 ====
# 1. 函数返回多个值
def get_min_max(numbers):
return min(numbers), max(numbers)
minimum, maximum = get_min_max([3, 1, 4, 1, 5])
# 2. 字典的键(列表不行)
locations = {
(0, 0): "原点",
(1, 0): "东",
(0, 1): "北"
}
# 3. 数据记录
person = ("Alice", 25, "Engineer")
name, age, job = person
# 4. 保护数据不被修改
DAYS = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
===== 4.3 字典(Dictionary) =====
字典是**键值对**的集合,是无序(Python 3.7+保持插入顺序)、可变的映射类型。
==== 4.3.1 创建字典 ====
# 直接创建
person = {"name": "Alice", "age": 25, "city": "北京"}
# 使用dict()函数
person = dict(name="Alice", age=25, city="北京")
person = dict([("name", "Alice"), ("age", 25)])
# 从键创建
keys = ["a", "b", "c"]
d = dict.fromkeys(keys, 0) # {'a': 0, 'b': 0, 'c': 0}
# 字典推导式
squares = {x: x**2 for x in range(6)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
==== 4.3.2 访问和修改 ====
person = {"name": "Alice", "age": 25}
# 访问
print(person["name"]) # Alice
print(person.get("age")) # 25
print(person.get("email", "未知")) # 未知(不存在时返回默认值)
# 修改和添加
person["age"] = 26 # 修改
person["email"] = "alice@example.com" # 添加
# 批量更新
person.update({"phone": "123456", "city": "上海"})
# 删除
del person["email"]
email = person.pop("email", "不存在") # 删除并返回值
last = person.popitem() # 删除并返回最后插入的键值对(Python 3.7+)
person.clear() # 清空
==== 4.3.3 字典方法 ====
person = {"name": "Alice", "age": 25, "city": "北京"}
# 获取视图
print(person.keys()) # dict_keys(['name', 'age', 'city'])
print(person.values()) # dict_values(['Alice', 25, '北京'])
print(person.items()) # dict_items([('name', 'Alice'), ...])
# 遍历
for key in person:
print(f"{key}: {person[key]}")
for key, value in person.items():
print(f"{key}: {value}")
# 检查键
print("name" in person) # True
# 设置默认值
person.setdefault("email", "未设置")
==== 4.3.4 字典推导式 ====
# 基本推导式
squares = {x: x**2 for x in range(10)}
# 带条件的推导式
even_squares = {x: x**2 for x in range(10) if x % 2 == 0}
# 交换键值
original = {"a": 1, "b": 2, "c": 3}
swapped = {v: k for k, v in original.items()}
# 过滤字典
scores = {"Alice": 85, "Bob": 92, "Charlie": 78}
passed = {k: v for k, v in scores.items() if v >= 80}
==== 4.3.5 嵌套字典 ====
# 嵌套字典
users = {
"alice": {
"name": "Alice Smith",
"age": 25,
"email": "alice@example.com"
},
"bob": {
"name": "Bob Jones",
"age": 30,
"email": "bob@example.com"
}
}
# 访问嵌套数据
print(users["alice"]["email"])
# 遍历嵌套字典
for username, info in users.items():
print(f"用户名: {username}")
for key, value in info.items():
print(f" {key}: {value}")
===== 4.4 集合(Set) =====
集合是**无序、不重复**的元素集合。
==== 4.4.1 创建集合 ====
# 直接创建
fruits = {"apple", "banana", "cherry"}
# 使用set()函数
from_list = set([1, 2, 2, 3, 3, 3]) # {1, 2, 3}
from_string = set("hello") # {'h', 'e', 'l', 'o'}
# 空集合(注意:{}是空字典)
empty_set = set()
# 集合推导式
squares = {x**2 for x in range(10)}
==== 4.4.2 集合操作 ====
fruits = {"apple", "banana", "cherry"}
# 添加元素
fruits.add("date")
fruits.update(["fig", "grape"]) # 批量添加
# 删除元素
fruits.remove("banana") # 不存在会报错
fruits.discard("banana") # 不存在不会报错
popped = fruits.pop() # 随机删除并返回
fruits.clear() # 清空
==== 4.4.3 集合运算 ====
a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}
# 并集
print(a | b) # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))
# 交集
print(a & b) # {4, 5}
print(a.intersection(b))
# 差集
print(a - b) # {1, 2, 3}
print(a.difference(b))
# 对称差集(异或)
print(a ^ b) # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))
# 子集和超集
print(a.issubset(b)) # False
print(a.issuperset({1, 2})) # True
print(a.isdisjoint({6, 7})) # True(不相交)
==== 4.4.4 不可变集合(frozenset) ====
# frozenset是不可变的,可以作为字典的键或集合的元素
fs = frozenset([1, 2, 3])
# fs.add(4) # AttributeError
# 使用场景
data = {
frozenset([1, 2]): "value1",
frozenset([3, 4]): "value2"
}
===== 4.5 字符串(String)深入 =====
==== 4.5.1 字符串的不可变性 ====
s = "hello"
# s[0] = "H" # TypeError
# "修改"字符串:创建新字符串
s = "H" + s[1:] # "Hello"
==== 4.5.2 常用字符串方法 ====
text = " Hello, World! "
# 大小写转换
print(text.upper()) # 全大写
print(text.lower()) # 全小写
print(text.title()) # 首字母大写
print(text.capitalize()) # 首字母大写,其余小写
print(text.swapcase()) # 大小写互换
# 去除空白
print(text.strip()) # 去除两端空白
print(text.lstrip()) # 去除左边空白
print(text.rstrip()) # 去除右边空白
print(text.strip(" !")) # 去除指定字符
# 查找和替换
print(text.find("World")) # 查找子串,返回索引或-1
print(text.index("World")) # 查找子串,不存在则报错
print(text.replace("World", "Python"))
print(text.count("l")) # 统计出现次数
# 分割和连接
words = "apple,banana,cherry".split(",")
csv = ",".join(words)
lines = "line1\nline2\nline3".splitlines()
==== 4.5.3 字符串判断 ====
print("123".isdigit()) # True
print("abc".isalpha()) # True
print("abc123".isalnum()) # True
print(" ".isspace()) # True
print("Title".istitle()) # True
print("UPPER".isupper()) # True
print("lower".islower()) # True
print("12345".startswith("12")) # True
print("hello.txt".endswith(".txt")) # True
==== 4.5.4 字符串格式化 ====
name = "Alice"
age = 25
# f-string(推荐)
print(f"My name is {name}, I'm {age} years old.")
print(f"Pi = {3.14159:.2f}") # Pi = 3.14
print(f"{name:^20}") # 居中
print(f"{name:>20}") # 右对齐
print(f"{age:0>5}") # 填充 00025
# format()方法
print("My name is {}, I'm {} years old.".format(name, age))
print("My name is {0}, I'm {1} years old.".format(name, age))
print("My name is {n}, I'm {a} years old.".format(n=name, a=age))
# %格式化(旧式)
print("My name is %s, I'm %d years old." % (name, age))
===== 4.6 数据结构比较 =====
| 特性 | 列表 | 元组 | 字典 | 集合 |
|------+------+------+------+------|
| 语法 | [1, 2] | (1, 2) | {1: 'a'} | {1, 2} |
| 有序 | ✓ | ✓ | ✓(3.7+) | ✗ |
| 可变 | ✓ | ✗ | ✓ | ✓ |
| 可哈希 | ✗ | ✓(元素可哈希) | ✗ | ✗ |
| 重复元素 | ✓ | ✓ | 键不可重复 | ✗ |
| 索引访问 | ✓ | ✓ | 按键访问 | ✗ |
| 适用场景 | 序列数据 | 固定数据、字典键 | 键值映射 | 去重、集合运算 |
===== 4.7 代码示例 =====
==== 示例1:词频统计 ====
from collections import Counter
def word_frequency(text):
"""统计文本中单词出现的频率"""
# 转换为小写,分割单词
words = text.lower().split()
# 去除标点
words = [''.join(c for c in w if c.isalnum()) for w in words]
# 过滤空字符串
words = [w for w in words if w]
return Counter(words)
text = """
Python is great and Python is easy.
Learning Python is fun!
"""
freq = word_frequency(text)
print("词频统计:")
for word, count in freq.most_common():
print(f" {word}: {count}")
==== 示例2:学生成绩管理系统 ====
class StudentManager:
def __init__(self):
self.students = {}
def add_student(self, student_id, name):
self.students[student_id] = {
"name": name,
"scores": {}
}
def add_score(self, student_id, subject, score):
if student_id in self.students:
self.students[student_id]["scores"][subject] = score
def get_average(self, student_id):
if student_id not in self.students:
return None
scores = self.students[student_id]["scores"].values()
return sum(scores) / len(scores) if scores else 0
def get_top_students(self, n=3):
averages = []
for sid, info in self.students.items():
avg = self.get_average(sid)
if avg is not None:
averages.append((info["name"], avg))
return sorted(averages, key=lambda x: x[1], reverse=True)[:n]
# 使用
manager = StudentManager()
manager.add_student("001", "Alice")
manager.add_student("002", "Bob")
manager.add_score("001", "Math", 90)
manager.add_score("001", "English", 85)
manager.add_score("002", "Math", 95)
manager.add_score("002", "English", 88)
print("前三名:", manager.get_top_students())
==== 示例3:使用Counter进行数据分析 ====
from collections import Counter
import random
# 模拟掷骰子
rolls = [random.randint(1, 6) for _ in range(1000)]
count = Counter(rolls)
print("掷骰子统计:")
for number in range(1, 7):
freq = count[number]
bar = "█" * (freq // 5)
print(f"{number}: {bar} {freq}")
===== 4.8 练习题 =====
==== 练习1:列表去重保持顺序 ====
def unique_ordered(seq):
"""去重并保持原始顺序"""
seen = set()
result = []
for item in seq:
if item not in seen:
seen.add(item)
result.append(item)
return result
# 或使用dict.fromkeys() (Python 3.7+)
def unique_ordered_v2(seq):
return list(dict.fromkeys(seq))
# 测试
print(unique_ordered([3, 1, 2, 1, 3, 2, 4])) # [3, 1, 2, 4]
==== 练习2:两个字典合并 ====
def merge_dicts(d1, d2):
"""合并两个字典,相同键时d2覆盖d1"""
result = d1.copy()
result.update(d2)
return result
# Python 3.9+ 使用 | 操作符
def merge_dicts_v2(d1, d2):
return d1 | d2
# 测试
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
print(merge_dicts(d1, d2)) # {'a': 1, 'b': 3, 'c': 4}
==== 练习3:找出两个列表的共同元素 ====
def common_elements(list1, list2):
"""找出两个列表的共同元素"""
return list(set(list1) & set(list2))
# 保持顺序版本
def common_elements_ordered(list1, list2):
set2 = set(list2)
return [x for x in list1 if x in set2]
# 测试
l1 = [1, 2, 3, 4, 5]
l2 = [4, 5, 6, 7, 8]
print(common_elements(l1, l2)) # [4, 5]
==== 练习4:统计字符类型 ====
def analyze_text(text):
"""分析文本中的字符类型"""
return {
"letters": sum(c.isalpha() for c in text),
"digits": sum(c.isdigit() for c in text),
"spaces": sum(c.isspace() for c in text),
"others": sum(not c.isalnum() and not c.isspace() for c in text)
}
text = "Hello World! 123"
result = analyze_text(text)
for key, value in result.items():
print(f"{key}: {value}")
===== 本章小结 =====
本章深入学习了Python的核心数据结构:
* **列表** - 有序可变序列,支持增删改查、排序、切片等操作
* **元组** - 有序不可变序列,用于保护数据、作为字典键
* **字典** - 键值对映射,快速查找、嵌套结构
* **集合** - 无序不重复元素,支持数学集合运算
* **字符串** - 不可变字符序列,丰富的方法支持
掌握这些数据结构是编写高效Python代码的基础。下一章将学习面向对象编程。
===== 进一步阅读 =====
* [[https://docs.python.org/zh-cn/3/tutorial/datastructures.html|Python官方教程 - 数据结构]]
* [[https://docs.python.org/zh-cn/3/library/collections.html|collections模块]]