第四章数据结构

本章概要

Python提供了多种内置数据结构，包括列表、元组、字典、集合和字符串。本章将深入学习这些数据结构的特点、操作方法和最佳实践。

4.1 列表（List）

列表是Python中最常用的数据结构，是一个有序、可变的元素集合。

4.1.1 创建列表

# 直接创建
fruits = ["apple", "banana", "cherry"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True, [1, 2, 3]]
 
# 使用list()函数
empty = list()
from_string = list("Python")  # ['P', 'y', 't', 'h', 'o', 'n']
from_range = list(range(5))   # [0, 1, 2, 3, 4]
 
# 列表推导式
squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]

4.1.2 访问元素

fruits = ["apple", "banana", "cherry", "date", "elderberry"]
 
# 索引访问
print(fruits[0])    # apple
print(fruits[-1])   # elderberry
 
# 切片
print(fruits[1:3])   # ['banana', 'cherry']
print(fruits[:3])    # ['apple', 'banana', 'cherry']
print(fruits[2:])    # ['cherry', 'date', 'elderberry']
print(fruits[::2])   # ['apple', 'cherry', 'elderberry']
print(fruits[::-1])  # 反转列表
 
# 检查成员
print("apple" in fruits)   # True
print("grape" not in fruits)  # True

4.1.3 修改列表

fruits = ["apple", "banana", "cherry"]
 
# 修改元素
fruits[0] = "apricot"
 
# 添加元素
fruits.append("date")           # 末尾添加
fruits.insert(1, "blueberry")   # 指定位置插入
fruits.extend(["fig", "grape"]) # 批量添加
 
# 删除元素
fruits.remove("banana")  # 删除第一个匹配项
del fruits[0]           # 删除指定索引
popped = fruits.pop()   # 删除并返回最后一个
popped = fruits.pop(0)  # 删除并返回指定索引
fruits.clear()          # 清空列表
 
print(fruits)

4.1.4 列表方法

numbers = [3, 1, 4, 1, 5, 9, 2, 6]
 
# 排序
numbers.sort()           # 原地排序
numbers.sort(reverse=True)  # 降序
 
# 或使用sorted()（返回新列表）
sorted_numbers = sorted(numbers)
 
# 反转
numbers.reverse()
 
# 查找
print(numbers.index(5))  # 返回第一个匹配的索引
print(numbers.count(1))  # 统计出现次数
 
# 复制
new_list = numbers.copy()  # 浅拷贝
new_list = numbers[:]      # 切片复制
new_list = list(numbers)   # 构造函数复制

4.1.5 列表操作

# 拼接
list1 = [1, 2, 3]
list2 = [4, 5, 6]
combined = list1 + list2  # [1, 2, 3, 4, 5, 6]
 
# 重复
repeated = [0] * 5  # [0, 0, 0, 0, 0]
 
# 长度
print(len(combined))
 
# 最值
print(max([3, 1, 4, 1, 5]))
print(min([3, 1, 4, 1, 5]))
print(sum([1, 2, 3, 4, 5]))

4.1.6 列表嵌套（二维列表）

# 创建二维列表
matrix = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]
 
# 访问元素
print(matrix[0][0])  # 1
print(matrix[1][2])  # 6
 
# 遍历
for row in matrix:
    for item in row:
        print(item, end=" ")
    print()
 
# 列表推导式创建二维列表
matrix = [[i * j for j in range(1, 4)] for i in range(1, 4)]
# [[1, 2, 3], [2, 4, 6], [3, 6, 9]]
 
# ⚠️ 创建二维列表的陷阱
wrong = [[0] * 3] * 3  # 错误！三行引用同一列表
wrong[0][0] = 1        # 所有行的第一个元素都变成1
 
correct = [[0] * 3 for _ in range(3)]  # 正确

4.2 元组（Tuple）

元组是有序、不可变的元素集合。

4.2.1 创建元组

# 直接创建
point = (3, 4)
single = (1,)  # 单元素元组需要逗号
empty = ()
 
# 省略括号
coordinates = 3, 4, 5
 
# 使用tuple()函数
from_list = tuple([1, 2, 3])
from_string = tuple("Python")

4.2.2 访问元组

point = (10, 20, 30, 40, 50)
 
# 索引和切片与列表相同
print(point[0])      # 10
print(point[-1])     # 50
print(point[1:4])    # (20, 30, 40)
 
# 解包
x, y = (3, 4)
x, y, *rest = (1, 2, 3, 4, 5)  # x=1, y=2, rest=[3,4,5]
first, *middle, last = (1, 2, 3, 4, 5)

4.2.3 元组的不可变性

point = (3, 4)
# point[0] = 5  # TypeError: 'tuple' object does not support item assignment
 
# 但可以修改可变元素
mutable_tuple = ([1, 2], [3, 4])
mutable_tuple[0].append(3)  # 可以！
print(mutable_tuple)  # ([1, 2, 3], [3, 4])
 
# "修改"元组：创建新元组
point = (3, 4)
point = (5, 4)  # 重新赋值

4.2.4 元组的使用场景

# 1. 函数返回多个值
def get_min_max(numbers):
    return min(numbers), max(numbers)
 
minimum, maximum = get_min_max([3, 1, 4, 1, 5])
 
# 2. 字典的键（列表不行）
locations = {
    (0, 0): "原点",
    (1, 0): "东",
    (0, 1): "北"
}
 
# 3. 数据记录
person = ("Alice", 25, "Engineer")
name, age, job = person
 
# 4. 保护数据不被修改
DAYS = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")

4.3 字典（Dictionary）

字典是键值对的集合，是无序（Python 3.7+保持插入顺序）、可变的映射类型。

4.3.1 创建字典

# 直接创建
person = {"name": "Alice", "age": 25, "city": "北京"}
 
# 使用dict()函数
person = dict(name="Alice", age=25, city="北京")
person = dict([("name", "Alice"), ("age", 25)])
 
# 从键创建
keys = ["a", "b", "c"]
d = dict.fromkeys(keys, 0)  # {'a': 0, 'b': 0, 'c': 0}
 
# 字典推导式
squares = {x: x**2 for x in range(6)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

4.3.2 访问和修改

person = {"name": "Alice", "age": 25}
 
# 访问
print(person["name"])       # Alice
print(person.get("age"))    # 25
print(person.get("email", "未知"))  # 未知（不存在时返回默认值）
 
# 修改和添加
person["age"] = 26          # 修改
person["email"] = "alice@example.com"  # 添加
 
# 批量更新
person.update({"phone": "123456", "city": "上海"})
 
# 删除
del person["email"]
email = person.pop("email", "不存在")  # 删除并返回值
last = person.popitem()  # 删除并返回最后插入的键值对（Python 3.7+）
person.clear()  # 清空

4.3.3 字典方法

person = {"name": "Alice", "age": 25, "city": "北京"}
 
# 获取视图
print(person.keys())    # dict_keys(['name', 'age', 'city'])
print(person.values())  # dict_values(['Alice', 25, '北京'])
print(person.items())   # dict_items([('name', 'Alice'), ...])
 
# 遍历
for key in person:
    print(f"{key}: {person[key]}")
 
for key, value in person.items():
    print(f"{key}: {value}")
 
# 检查键
print("name" in person)  # True
 
# 设置默认值
person.setdefault("email", "未设置")

4.3.4 字典推导式

# 基本推导式
squares = {x: x**2 for x in range(10)}
 
# 带条件的推导式
even_squares = {x: x**2 for x in range(10) if x % 2 == 0}
 
# 交换键值
original = {"a": 1, "b": 2, "c": 3}
swapped = {v: k for k, v in original.items()}
 
# 过滤字典
scores = {"Alice": 85, "Bob": 92, "Charlie": 78}
passed = {k: v for k, v in scores.items() if v >= 80}

4.3.5 嵌套字典

# 嵌套字典
users = {
    "alice": {
        "name": "Alice Smith",
        "age": 25,
        "email": "alice@example.com"
    },
    "bob": {
        "name": "Bob Jones",
        "age": 30,
        "email": "bob@example.com"
    }
}
 
# 访问嵌套数据
print(users["alice"]["email"])
 
# 遍历嵌套字典
for username, info in users.items():
    print(f"用户名: {username}")
    for key, value in info.items():
        print(f"  {key}: {value}")

4.4 集合（Set）

集合是无序、不重复的元素集合。

4.4.1 创建集合

# 直接创建
fruits = {"apple", "banana", "cherry"}
 
# 使用set()函数
from_list = set([1, 2, 2, 3, 3, 3])  # {1, 2, 3}
from_string = set("hello")  # {'h', 'e', 'l', 'o'}
 
# 空集合（注意：{}是空字典）
empty_set = set()
 
# 集合推导式
squares = {x**2 for x in range(10)}

4.4.2 集合操作

fruits = {"apple", "banana", "cherry"}
 
# 添加元素
fruits.add("date")
fruits.update(["fig", "grape"])  # 批量添加
 
# 删除元素
fruits.remove("banana")       # 不存在会报错
fruits.discard("banana")      # 不存在不会报错
popped = fruits.pop()         # 随机删除并返回
fruits.clear()                # 清空

4.4.3 集合运算

a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}
 
# 并集
print(a | b)           # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))
 
# 交集
print(a & b)           # {4, 5}
print(a.intersection(b))
 
# 差集
print(a - b)           # {1, 2, 3}
print(a.difference(b))
 
# 对称差集（异或）
print(a ^ b)           # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))
 
# 子集和超集
print(a.issubset(b))   # False
print(a.issuperset({1, 2}))  # True
print(a.isdisjoint({6, 7}))  # True（不相交）

4.4.4 不可变集合（frozenset）

# frozenset是不可变的，可以作为字典的键或集合的元素
fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError
 
# 使用场景
data = {
    frozenset([1, 2]): "value1",
    frozenset([3, 4]): "value2"
}

4.5 字符串（String）深入

4.5.1 字符串的不可变性

s = "hello"
# s[0] = "H"  # TypeError
 
# "修改"字符串：创建新字符串
s = "H" + s[1:]  # "Hello"

4.5.2 常用字符串方法

text = "  Hello, World!  "
 
# 大小写转换
print(text.upper())       # 全大写
print(text.lower())       # 全小写
print(text.title())       # 首字母大写
print(text.capitalize())  # 首字母大写，其余小写
print(text.swapcase())    # 大小写互换
 
# 去除空白
print(text.strip())       # 去除两端空白
print(text.lstrip())      # 去除左边空白
print(text.rstrip())      # 去除右边空白
print(text.strip(" !"))   # 去除指定字符
 
# 查找和替换
print(text.find("World"))   # 查找子串，返回索引或-1
print(text.index("World"))  # 查找子串，不存在则报错
print(text.replace("World", "Python"))
print(text.count("l"))      # 统计出现次数
 
# 分割和连接
words = "apple,banana,cherry".split(",")
csv = ",".join(words)
lines = "line1\nline2\nline3".splitlines()

4.5.3 字符串判断

print("123".isdigit())      # True
print("abc".isalpha())      # True
print("abc123".isalnum())   # True
print(" ".isspace())        # True
print("Title".istitle())    # True
print("UPPER".isupper())    # True
print("lower".islower())    # True
 
print("12345".startswith("12"))  # True
print("hello.txt".endswith(".txt"))  # True

4.5.4 字符串格式化

name = "Alice"
age = 25
 
# f-string（推荐）
print(f"My name is {name}, I'm {age} years old.")
print(f"Pi = {3.14159:.2f}")  # Pi = 3.14
print(f"{name:^20}")  # 居中
print(f"{name:>20}")  # 右对齐
print(f"{age:0>5}")   # 填充 00025
 
# format()方法
print("My name is {}, I'm {} years old.".format(name, age))
print("My name is {0}, I'm {1} years old.".format(name, age))
print("My name is {n}, I'm {a} years old.".format(n=name, a=age))
 
# %格式化（旧式）
print("My name is %s, I'm %d years old." % (name, age))

4.6 数据结构比较

特性	列表	元组	字典	集合
——+——+——+——+——
语法	[1, 2]	(1, 2)	{1: 'a'}	{1, 2}
有序	✓	✓	✓(3.7+)	✗
可变	✓	✗	✓	✓
可哈希	✗	✓(元素可哈希)	✗	✗
重复元素	✓	✓	键不可重复	✗
索引访问	✓	✓	按键访问	✗
适用场景	序列数据	固定数据、字典键	键值映射	去重、集合运算

4.7 代码示例

示例1：词频统计

from collections import Counter
 
def word_frequency(text):
    """统计文本中单词出现的频率"""
    # 转换为小写，分割单词
    words = text.lower().split()
    # 去除标点
    words = [''.join(c for c in w if c.isalnum()) for w in words]
    # 过滤空字符串
    words = [w for w in words if w]
 
    return Counter(words)
 
text = """
Python is great and Python is easy.
Learning Python is fun!
"""
 
freq = word_frequency(text)
print("词频统计:")
for word, count in freq.most_common():
    print(f"  {word}: {count}")

示例2：学生成绩管理系统

class StudentManager:
    def __init__(self):
        self.students = {}
 
    def add_student(self, student_id, name):
        self.students[student_id] = {
            "name": name,
            "scores": {}
        }
 
    def add_score(self, student_id, subject, score):
        if student_id in self.students:
            self.students[student_id]["scores"][subject] = score
 
    def get_average(self, student_id):
        if student_id not in self.students:
            return None
        scores = self.students[student_id]["scores"].values()
        return sum(scores) / len(scores) if scores else 0
 
    def get_top_students(self, n=3):
        averages = []
        for sid, info in self.students.items():
            avg = self.get_average(sid)
            if avg is not None:
                averages.append((info["name"], avg))
        return sorted(averages, key=lambda x: x[1], reverse=True)[:n]
 
# 使用
manager = StudentManager()
manager.add_student("001", "Alice")
manager.add_student("002", "Bob")
manager.add_score("001", "Math", 90)
manager.add_score("001", "English", 85)
manager.add_score("002", "Math", 95)
manager.add_score("002", "English", 88)
 
print("前三名:", manager.get_top_students())

示例3：使用Counter进行数据分析

from collections import Counter
import random
 
# 模拟掷骰子
rolls = [random.randint(1, 6) for _ in range(1000)]
count = Counter(rolls)
 
print("掷骰子统计:")
for number in range(1, 7):
    freq = count[number]
    bar = "█" * (freq // 5)
    print(f"{number}: {bar} {freq}")

4.8 练习题

练习1：列表去重保持顺序

def unique_ordered(seq):
    """去重并保持原始顺序"""
    seen = set()
    result = []
    for item in seq:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result
 
# 或使用dict.fromkeys() (Python 3.7+)
def unique_ordered_v2(seq):
    return list(dict.fromkeys(seq))
 
# 测试
print(unique_ordered([3, 1, 2, 1, 3, 2, 4]))  # [3, 1, 2, 4]

练习2：两个字典合并

def merge_dicts(d1, d2):
    """合并两个字典，相同键时d2覆盖d1"""
    result = d1.copy()
    result.update(d2)
    return result
 
# Python 3.9+ 使用 | 操作符
def merge_dicts_v2(d1, d2):
    return d1 | d2
 
# 测试
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
print(merge_dicts(d1, d2))  # {'a': 1, 'b': 3, 'c': 4}

练习3：找出两个列表的共同元素

def common_elements(list1, list2):
    """找出两个列表的共同元素"""
    return list(set(list1) & set(list2))
 
# 保持顺序版本
def common_elements_ordered(list1, list2):
    set2 = set(list2)
    return [x for x in list1 if x in set2]
 
# 测试
l1 = [1, 2, 3, 4, 5]
l2 = [4, 5, 6, 7, 8]
print(common_elements(l1, l2))  # [4, 5]

练习4：统计字符类型

def analyze_text(text):
    """分析文本中的字符类型"""
    return {
        "letters": sum(c.isalpha() for c in text),
        "digits": sum(c.isdigit() for c in text),
        "spaces": sum(c.isspace() for c in text),
        "others": sum(not c.isalnum() and not c.isspace() for c in text)
    }
 
text = "Hello World! 123"
result = analyze_text(text)
for key, value in result.items():
    print(f"{key}: {value}")

本章小结

本章深入学习了Python的核心数据结构：

列表 - 有序可变序列，支持增删改查、排序、切片等操作
元组 - 有序不可变序列，用于保护数据、作为字典键
字典 - 键值对映射，快速查找、嵌套结构
集合 - 无序不重复元素，支持数学集合运算
字符串 - 不可变字符序列，丰富的方法支持

掌握这些数据结构是编写高效Python代码的基础。下一章将学习面向对象编程。

目录

第四章 数据结构