Skip to content

Python 文件与 IO 操作

文件操作是编程中最基础也最常用的功能之一。Python 提供了简洁的文件读写 API 和强大的 pathlib 模块。

1. 文件读写基础

1.1 open() 函数

open() 是文件操作的核心函数,返回一个文件对象。

python
# 语法:open(file, mode='r', encoding=None)
# 必须在使用完毕后关闭文件

# ❌ 不推荐:手动关闭(容易遗忘,异常时可能跳过 close)
f = open("test.txt", "w", encoding="utf-8")
f.write("Hello")
f.close()

# ✅ 推荐:使用 with 语句(自动关闭,即使发生异常也会关闭)
with open("test.txt", "w", encoding="utf-8") as f:
    f.write("Hello")
# 离开 with 块后文件自动关闭

1.2 文件打开模式

模式说明文件不存在时文件存在时
r只读(默认)报错 FileNotFoundError从头读取
w只写创建新文件清空后写入
a追加创建新文件在末尾追加
x排他创建创建新文件报错 FileExistsError
r+读写报错从头读写
w+读写创建新文件清空后读写
a+读追加创建新文件可读,写入在末尾
rb二进制读报错读取字节
wb二进制写创建新文件清空后写入字节
python
# 创建测试文件
with open("demo.txt", "w", encoding="utf-8") as f:
    f.write("第一行\n第二行\n第三行\n")

# 'r' 只读
with open("demo.txt", "r", encoding="utf-8") as f:
    content = f.read()
    print(content)
# 第一行
# 第二行
# 第三行

# 'a' 追加
with open("demo.txt", "a", encoding="utf-8") as f:
    f.write("第四行\n")

# 'x' 排他创建(文件已存在时报错)
try:
    with open("demo.txt", "x", encoding="utf-8") as f:
        f.write("新内容")
except FileExistsError:
    print("文件已存在,无法创建")  # 文件已存在,无法创建

2. 读取文件

2.1 read() —— 读取全部内容

python
# 准备测试文件
with open("sample.txt", "w", encoding="utf-8") as f:
    f.write("Python 是一门优雅的语言\n学习 Python 很有趣\n让我们开始吧")

# 读取全部内容
with open("sample.txt", "r", encoding="utf-8") as f:
    content = f.read()
    print(content)
# Python 是一门优雅的语言
# 学习 Python 很有趣
# 让我们开始吧

print(type(content))  # <class 'str'>
print(len(content))   # 31

# read(n) —— 读取 n 个字符
with open("sample.txt", "r", encoding="utf-8") as f:
    chunk = f.read(10)
    print(chunk)        # Python 是一门
    print(f.read(5))    # 优雅的语言
    rest = f.read()     # 读取剩余全部
    print(rest)
# (换行)学习 Python 很有趣
# 让我们开始吧

2.2 readline() —— 逐行读取

python
with open("sample.txt", "r", encoding="utf-8") as f:
    line1 = f.readline()    # 读取一行(包含 \n)
    line2 = f.readline()
    line3 = f.readline()
    line4 = f.readline()    # 文件末尾返回空字符串

    print(repr(line1))  # 'Python 是一门优雅的语言\n'
    print(repr(line2))  # '学习 Python 很有趣\n'
    print(repr(line3))  # '让我们开始吧'
    print(repr(line4))  # ''  (文件末尾)

2.3 readlines() —— 读取所有行

python
with open("sample.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()   # 返回列表,每个元素是一行

print(lines)
# ['Python 是一门优雅的语言\n', '学习 Python 很有趣\n', '让我们开始吧']

# 去除每行的换行符
lines = [line.strip() for line in lines]
print(lines)
# ['Python 是一门优雅的语言', '学习 Python 很有趣', '让我们开始吧']

2.4 遍历文件(推荐方式)

python
# ✅ 最推荐:直接遍历文件对象(惰性读取,内存友好)
with open("sample.txt", "r", encoding="utf-8") as f:
    for line in f:
        print(line.strip())
# Python 是一门优雅的语言
# 学习 Python 很有趣
# 让我们开始吧

# 带行号遍历
with open("sample.txt", "r", encoding="utf-8") as f:
    for i, line in enumerate(f, start=1):
        print(f"第{i}行:{line.strip()}")
# 第1行:Python 是一门优雅的语言
# 第2行:学习 Python 很有趣
# 第3行:让我们开始吧

3. 写入文件

3.1 write() —— 写入字符串

python
# 写入(覆盖)
with open("output.txt", "w", encoding="utf-8") as f:
    f.write("Hello, Python!\n")
    f.write("文件写入示例\n")

    # write 返回写入的字符数
    count = f.write("第三行\n")
    print(f"写入了 {count} 个字符")  # 写入了 4 个字符

# 验证内容
with open("output.txt", "r", encoding="utf-8") as f:
    print(f.read())
# Hello, Python!
# 文件写入示例
# 第三行

3.2 writelines() —— 写入多行

python
lines = ["苹果\n", "香蕉\n", "樱桃\n"]

with open("fruits.txt", "w", encoding="utf-8") as f:
    f.writelines(lines)
    # ⚠️ writelines 不会自动添加换行符,需要自己加 \n

# 从列表写入(不含 \n 的列表)
items = ["Python", "Java", "Go", "Rust"]
with open("languages.txt", "w", encoding="utf-8") as f:
    f.writelines(item + "\n" for item in items)

# 也可以用 join + write
with open("languages2.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(items))

with open("languages.txt", "r", encoding="utf-8") as f:
    print(f.read())
# Python
# Java
# Go
# Rust

3.3 追加写入

python
# 'a' 模式:在文件末尾追加
with open("log.txt", "w", encoding="utf-8") as f:
    f.write("[2024-01-01] 应用启动\n")

with open("log.txt", "a", encoding="utf-8") as f:
    f.write("[2024-01-01] 用户登录\n")
    f.write("[2024-01-01] 数据处理完成\n")

with open("log.txt", "r", encoding="utf-8") as f:
    print(f.read())
# [2024-01-01] 应用启动
# [2024-01-01] 用户登录
# [2024-01-01] 数据处理完成

3.4 print() 写入文件

python
with open("print_output.txt", "w", encoding="utf-8") as f:
    print("Hello, World!", file=f)
    print("数字:", 42, file=f)
    print("列表:", [1, 2, 3], file=f)

with open("print_output.txt", "r", encoding="utf-8") as f:
    print(f.read())
# Hello, World!
# 数字: 42
# 列表: [1, 2, 3]

4. 文件指针操作

4.1 tell() 和 seek()

python
with open("sample.txt", "r", encoding="utf-8") as f:
    # tell() 返回当前指针位置(字节偏移量)
    print(f.tell())       # 0  (文件开头)

    f.read(6)             # 读取 6 个字符(UTF-8 中文占 3 字节)
    print(f.tell())       # 18 或其他值(取决于具体内容的字节数)

    # seek(offset, whence) 移动指针
    # whence: 0=文件开头(默认),1=当前位置,2=文件末尾
    f.seek(0)             # 回到文件开头
    print(f.tell())       # 0
    print(f.readline().strip())  # Python 是一门优雅的语言

# 实际应用:读取文件最后几行
with open("sample.txt", "rb") as f:  # 二进制模式才能从末尾 seek
    f.seek(0, 2)         # 移到文件末尾
    file_size = f.tell()
    print(f"文件大小:{file_size} 字节")

4.2 文件对象的常用属性

python
with open("sample.txt", "r", encoding="utf-8") as f:
    print(f.name)      # sample.txt    文件名
    print(f.mode)      # r             打开模式
    print(f.encoding)  # utf-8         编码
    print(f.closed)    # False         是否已关闭

print(f.closed)        # True          离开 with 后自动关闭

5. 大文件处理

5.1 逐行处理(内存友好)

python
# 创建一个较大的测试文件
with open("big_file.txt", "w", encoding="utf-8") as f:
    for i in range(10000):
        f.write(f"这是第 {i+1} 行数据\n")

# ❌ 不推荐:一次性读取全部(大文件会耗尽内存)
# content = open("big_file.txt").read()

# ✅ 推荐:逐行读取
line_count = 0
with open("big_file.txt", "r", encoding="utf-8") as f:
    for line in f:   # 每次只加载一行到内存
        line_count += 1
print(f"共 {line_count} 行")  # 共 10000 行

5.2 分块读取二进制文件

python
def copy_file(src, dst, chunk_size=8192):
    """分块复制文件(适用于大文件)"""
    bytes_copied = 0
    with open(src, "rb") as fin, open(dst, "wb") as fout:
        while True:
            chunk = fin.read(chunk_size)  # 每次读取 8KB
            if not chunk:
                break
            fout.write(chunk)
            bytes_copied += len(chunk)
    return bytes_copied

# 测试
size = copy_file("big_file.txt", "big_file_copy.txt")
print(f"复制了 {size} 字节")  # 复制了 288890 字节

import os
os.remove("big_file_copy.txt")

5.3 使用 iter() + sentinel 读取

python
# 用 iter 的双参数形式:iter(callable, sentinel)
# callable 每次被调用返回一个值,直到返回 sentinel 时停止

with open("big_file.txt", "rb") as f:
    chunks = []
    for chunk in iter(lambda: f.read(4096), b""):  # 读到空字节停止
        chunks.append(chunk)

total = sum(len(c) for c in chunks)
print(f"分 {len(chunks)} 块读取,共 {total} 字节")
# 分 71 块读取,共 288890 字节

os.remove("big_file.txt")

6. 二进制文件操作

6.1 读写二进制数据

python
# 写入二进制数据
data = bytes(range(256))  # 0x00 到 0xFF

with open("binary.dat", "wb") as f:
    f.write(data)

# 读取二进制数据
with open("binary.dat", "rb") as f:
    content = f.read()
    print(type(content))      # <class 'bytes'>
    print(len(content))       # 256
    print(content[:10])       # b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09'
    print(list(content[:5]))  # [0, 1, 2, 3, 4]

import os
os.remove("binary.dat")

6.2 struct —— 处理二进制结构体

python
import struct

# 打包:Python 值 → 二进制字节
# 格式:i=int(4字节)  f=float(4字节)  d=double(8字节)  s=char[]
packed = struct.pack("ifd", 42, 3.14, 2.718)
print(packed)       # b'*\x00\x00\x00\xc3\xf5H@\xb6\xf3\xfdT\xfb\xbf\x05@'
print(len(packed))  # 16

# 解包:二进制字节 → Python 值
values = struct.unpack("ifd", packed)
print(values)  # (42, 3.140000104904175, 2.718)

# 实用示例:读写自定义二进制格式(如图片头部)
header_format = "4sII"  # 4字节标识 + 2个无符号整数(宽、高)
header = struct.pack(header_format, b"IMG\x00", 1920, 1080)
print(len(header))  # 12

magic, width, height = struct.unpack(header_format, header)
print(f"标识:{magic},尺寸:{width}x{height}")
# 标识:b'IMG\x00',尺寸:1920x1080

# 常用格式字符
# b/B  int8/uint8    h/H  int16/uint16    i/I  int32/uint32
# q/Q  int64/uint64  f    float32          d   float64
# s    char[]        ?    bool
# <    小端序        >    大端序            !   网络序(大端)

7. pathlib —— 现代路径操作

7.1 Path 对象基础

python
from pathlib import Path

# 创建路径
p = Path(".")                    # 当前目录
home = Path.home()               # 用户主目录
absolute = Path("/usr/local/bin")

# 路径拼接(/ 运算符)
config = home / ".config" / "app" / "config.json"
print(config)  # /home/user/.config/app/config.json(Linux)

# 路径属性
p = Path("/home/user/projects/report.csv")
print(p.name)       # report.csv      文件全名
print(p.stem)       # report           文件名(无扩展名)
print(p.suffix)     # .csv             扩展名
print(p.suffixes)   # ['.csv']         所有扩展名
print(p.parent)     # /home/user/projects  父目录
print(p.parents[0]) # /home/user/projects
print(p.parents[1]) # /home/user
print(p.parts)      # ('/', 'home', 'user', 'projects', 'report.csv')
print(p.anchor)     # /                根

# 多重扩展名
p = Path("archive.tar.gz")
print(p.suffixes)    # ['.tar', '.gz']
print(p.stem)        # archive.tar
# 获取真正的文件名(去除所有扩展名)
print(p.name.split(".")[0])  # archive

7.2 路径判断与信息

python
from pathlib import Path

p = Path(".")

# 判断
print(p.exists())       # True
print(p.is_dir())       # True
print(p.is_file())      # False
print(p.is_absolute())  # False

# 转换
print(p.resolve())      # /home/user/project  (绝对路径)
print(p.absolute())     # /home/user/project

# 文件信息
sample = Path("sample.txt")
if sample.exists():
    stat = sample.stat()
    print(f"大小:{stat.st_size} 字节")
    print(f"修改时间戳:{stat.st_mtime}")

    # 格式化修改时间
    from datetime import datetime
    mtime = datetime.fromtimestamp(stat.st_mtime)
    print(f"修改时间:{mtime:%Y-%m-%d %H:%M:%S}")

7.3 文件读写

python
from pathlib import Path

p = Path("pathlib_demo.txt")

# 写入文本
p.write_text("Hello, pathlib!\n你好,世界!", encoding="utf-8")

# 读取文本
content = p.read_text(encoding="utf-8")
print(content)
# Hello, pathlib!
# 你好,世界!

# 写入二进制
bp = Path("binary_demo.dat")
bp.write_bytes(b"\x00\x01\x02\x03")

# 读取二进制
data = bp.read_bytes()
print(data)  # b'\x00\x01\x02\x03'

# 清理
p.unlink()
bp.unlink()

7.4 目录操作

python
from pathlib import Path

# 创建目录
Path("test_dir/sub1/sub2").mkdir(parents=True, exist_ok=True)
Path("test_dir/sub1/a.txt").write_text("aaa", encoding="utf-8")
Path("test_dir/sub1/b.py").write_text("bbb", encoding="utf-8")
Path("test_dir/sub1/sub2/c.txt").write_text("ccc", encoding="utf-8")
Path("test_dir/d.json").write_text("{}", encoding="utf-8")

# 列出直接子项
print("=== iterdir ===")
for item in Path("test_dir").iterdir():
    kind = "目录" if item.is_dir() else "文件"
    print(f"  [{kind}] {item.name}")
# === iterdir ===
#   [目录] sub1
#   [文件] d.json

# glob 模式匹配(当前目录层级)
print("=== glob *.txt ===")
for f in Path("test_dir/sub1").glob("*.txt"):
    print(f"  {f}")
# === glob *.txt ===
#   test_dir/sub1/a.txt

# rglob 递归匹配(所有层级)
print("=== rglob *.txt ===")
for f in Path("test_dir").rglob("*.txt"):
    print(f"  {f}")
# === rglob *.txt ===
#   test_dir/sub1/a.txt
#   test_dir/sub1/sub2/c.txt

# 常用 glob 模式
# *.py        当前目录所有 .py 文件
# **/*.py     所有层级的 .py 文件(等同于 rglob)
# data_*      以 data_ 开头的文件
# *.[jt]s     .js 或 .ts 文件

# 清理(递归删除需要 shutil)
import shutil
shutil.rmtree("test_dir")

7.5 路径修改

python
from pathlib import Path

p = Path("/home/user/data/report.csv")

# 替换文件名
print(p.with_name("summary.csv"))      # /home/user/data/summary.csv

# 替换扩展名
print(p.with_suffix(".json"))          # /home/user/data/report.json
print(p.with_suffix(""))               # /home/user/data/report

# 替换文件名(不含扩展名)
print(p.with_stem("output"))           # /home/user/data/output.csv  (Python 3.9+)

# 相对路径
base = Path("/home/user")
print(p.relative_to(base))            # data/report.csv

# 拼接
print(p.parent / "backup" / p.name)   # /home/user/data/backup/report.csv

8. JSON 文件操作

8.1 读写 JSON

python
import json
from pathlib import Path

# 写入 JSON
data = {
    "users": [
        {"name": "Alice", "age": 25, "skills": ["Python", "SQL"]},
        {"name": "Bob", "age": 30, "skills": ["Java", "Go"]},
    ],
    "total": 2,
    "active": True
}

with open("users.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

# 读取 JSON
with open("users.json", "r", encoding="utf-8") as f:
    loaded = json.load(f)

print(loaded["users"][0]["name"])    # Alice
print(loaded["users"][1]["skills"])  # ['Java', 'Go']
print(loaded["total"])               # 2

Path("users.json").unlink()

8.2 处理特殊类型

python
import json
from datetime import datetime, date
from pathlib import Path

# JSON 不支持 datetime、set、bytes 等类型
# 需要自定义编码器

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        if isinstance(obj, set):
            return list(obj)
        if isinstance(obj, bytes):
            return obj.hex()
        return super().default(obj)

data = {
    "created": datetime(2024, 6, 15, 10, 30),
    "tags": {"python", "tutorial"},
    "token": b"\xab\xcd\xef",
}

json_str = json.dumps(data, cls=CustomEncoder, ensure_ascii=False)
print(json_str)
# {"created": "2024-06-15T10:30:00", "tags": ["tutorial", "python"], "token": "abcdef"}

9. CSV 文件操作

9.1 读写 CSV

python
import csv
from pathlib import Path

# 写入 CSV
data = [
    ["姓名", "年龄", "城市"],
    ["Alice", 25, "Beijing"],
    ["Bob", 30, "Shanghai"],
    ["Charlie", 28, "Shenzhen"],
]

with open("people.csv", "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(data)   # 写入所有行
    # 也可以逐行写入:writer.writerow(["David", 35, "Guangzhou"])

# 读取 CSV
with open("people.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)    # 读取表头
    print(f"表头:{header}")  # 表头:['姓名', '年龄', '城市']
    for row in reader:
        print(f"  {row[0]}{row[1]}岁,{row[2]}")
#   Alice,25岁,Beijing
#   Bob,30岁,Shanghai
#   Charlie,28岁,Shenzhen

9.2 DictReader / DictWriter

python
import csv

# DictWriter —— 用字典写入(更直观)
fields = ["name", "age", "city"]
users = [
    {"name": "Alice", "age": 25, "city": "Beijing"},
    {"name": "Bob", "age": 30, "city": "Shanghai"},
]

with open("users.csv", "w", encoding="utf-8", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fields)
    writer.writeheader()      # 写入表头
    writer.writerows(users)   # 写入所有行

# DictReader —— 读取为字典
with open("users.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    print(f"字段:{reader.fieldnames}")  # 字段:['name', 'age', 'city']
    for row in reader:
        print(f"  {row['name']} - {row['city']}")
#   Alice - Beijing
#   Bob - Shanghai

from pathlib import Path
Path("people.csv").unlink()
Path("users.csv").unlink()

10. 其他文件格式

10.1 pickle —— Python 对象序列化

python
import pickle

# pickle 可以保存几乎任何 Python 对象(但只有 Python 能读取)
data = {
    "list": [1, 2, 3],
    "tuple": (4, 5),
    "set": {6, 7, 8},
    "nested": {"a": [1, 2]},
}

# 序列化(写入)
with open("data.pkl", "wb") as f:   # 必须用二进制模式
    pickle.dump(data, f)

# 反序列化(读取)
with open("data.pkl", "rb") as f:
    loaded = pickle.load(f)

print(loaded)
# {'list': [1, 2, 3], 'tuple': (4, 5), 'set': {8, 6, 7}, 'nested': {'a': [1, 2]}}
print(loaded == data)  # True

# 序列化为字节串(不写入文件)
raw = pickle.dumps(data)
print(type(raw))  # <class 'bytes'>

restored = pickle.loads(raw)
print(restored == data)  # True

# ⚠️ 安全警告:永远不要 pickle.load 不信任的数据!
# pickle 可以执行任意代码,只加载你自己创建的 .pkl 文件

import os
os.remove("data.pkl")

10.2 INI / TOML 配置文件

python
# ===== configparser:处理 INI 文件 =====
import configparser

# 写入 INI
config = configparser.ConfigParser()
config["DEFAULT"] = {"debug": "false"}
config["server"] = {"host": "127.0.0.1", "port": "8000"}
config["database"] = {"url": "sqlite:///app.db", "pool_size": "5"}

with open("config.ini", "w") as f:
    config.write(f)

# 读取 INI
config = configparser.ConfigParser()
config.read("config.ini")

print(config["server"]["host"])          # 127.0.0.1
print(config["server"]["port"])          # 8000
print(config["database"]["url"])         # sqlite:///app.db
print(config["server"]["debug"])         # false  (来自 DEFAULT)
print(config.getint("server", "port"))   # 8000   (转为 int)
print(config.getboolean("server", "debug"))  # False

# 遍历
for section in config.sections():
    print(f"[{section}]")
    for key, value in config[section].items():
        print(f"  {key} = {value}")
# [server]
#   host = 127.0.0.1
#   port = 8000
#   debug = false
# [database]
#   url = sqlite:///app.db
#   pool_size = 5
#   debug = false

# ===== tomllib:处理 TOML 文件(Python 3.11+)=====
import tomllib
from pathlib import Path

# 写入 TOML(标准库只有读取,写入需要第三方库 tomli-w)
toml_content = """
[project]
name = "myapp"
version = "1.0.0"

[server]
host = "127.0.0.1"
port = 8000
debug = false

[database]
url = "sqlite:///app.db"
"""

Path("config.toml").write_text(toml_content)

# 读取 TOML
with open("config.toml", "rb") as f:
    toml_data = tomllib.load(f)

print(toml_data["project"]["name"])     # myapp
print(toml_data["server"]["port"])      # 8000  (直接是 int)
print(toml_data["server"]["debug"])     # False  (直接是 bool)

import os
os.remove("config.ini")
os.remove("config.toml")

11. 编码处理

11.1 常见编码问题

python
# UTF-8:国际通用,推荐使用
# GBK/GB2312:中文 Windows 默认编码
# Latin-1/ISO-8859-1:西欧编码

# 写入 UTF-8
with open("utf8.txt", "w", encoding="utf-8") as f:
    f.write("你好,世界!")

# 用错误编码读取会乱码或报错
try:
    with open("utf8.txt", "r", encoding="gbk") as f:
        print(f.read())
except UnicodeDecodeError as e:
    print(f"解码错误:{e}")
# 解码错误:'gbk' codec can't decode byte ...

# 正确读取
with open("utf8.txt", "r", encoding="utf-8") as f:
    print(f.read())  # 你好,世界!

11.2 处理编码错误

python
# errors 参数控制编码错误的处理方式

# errors='strict'(默认)—— 报错
# errors='ignore' —— 忽略无法解码的字节
# errors='replace' —— 用 ? 替换
# errors='backslashreplace' —— 用转义序列替换

# 写入一个 GBK 编码的文件用于测试
with open("gbk.txt", "w", encoding="gbk") as f:
    f.write("你好世界")

# 用 UTF-8 读取 GBK 文件
with open("gbk.txt", "r", encoding="utf-8", errors="replace") as f:
    content = f.read()
    print(content)  # ���ã����(乱码被替换为 ?)

with open("gbk.txt", "r", encoding="utf-8", errors="ignore") as f:
    content = f.read()
    print(repr(content))  # 忽略无法解码的字节

# 正确方式:用 GBK 解码
with open("gbk.txt", "r", encoding="gbk") as f:
    print(f.read())  # 你好世界

import os
os.remove("utf8.txt")
os.remove("gbk.txt")

11.3 自动检测编码

python
# 使用第三方库 chardet 检测编码
# pip install chardet

# import chardet
#
# with open("unknown.txt", "rb") as f:
#     raw = f.read()
#     result = chardet.detect(raw)
#     print(result)
#     # {'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
#
#     content = raw.decode(result["encoding"])
#     print(content)

12. shutil —— 高级文件操作

python
import shutil
from pathlib import Path

# 准备测试目录
Path("src_dir/sub").mkdir(parents=True, exist_ok=True)
Path("src_dir/a.txt").write_text("aaa", encoding="utf-8")
Path("src_dir/b.txt").write_text("bbb", encoding="utf-8")
Path("src_dir/sub/c.txt").write_text("ccc", encoding="utf-8")

# 复制文件
shutil.copy("src_dir/a.txt", "a_copy.txt")            # 复制文件
shutil.copy2("src_dir/b.txt", "b_copy.txt")           # 复制文件(保留元数据)

# 复制整个目录树
shutil.copytree("src_dir", "dst_dir")
print(list(Path("dst_dir").rglob("*")))
# [Path('dst_dir/a.txt'), Path('dst_dir/b.txt'), Path('dst_dir/sub'), Path('dst_dir/sub/c.txt')]

# 移动文件/目录
shutil.move("a_copy.txt", "dst_dir/a_moved.txt")

# 删除整个目录树
shutil.rmtree("dst_dir")

# 磁盘使用情况
usage = shutil.disk_usage(".")
print(f"总计:{usage.total / (1024**3):.1f} GB")
print(f"已用:{usage.used / (1024**3):.1f} GB")
print(f"可用:{usage.free / (1024**3):.1f} GB")

# 创建压缩文件
shutil.make_archive("backup", "zip", "src_dir")
print(Path("backup.zip").stat().st_size, "字节")

# 解压
shutil.unpack_archive("backup.zip", "restored_dir")
print(list(Path("restored_dir").rglob("*.txt")))
# [Path('restored_dir/a.txt'), Path('restored_dir/b.txt'), Path('restored_dir/sub/c.txt')]

# 清理
shutil.rmtree("src_dir")
shutil.rmtree("restored_dir")
Path("b_copy.txt").unlink()
Path("backup.zip").unlink()

13. 临时文件

python
import tempfile
from pathlib import Path

# 临时文件(自动删除)
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt",
                                  delete=False, encoding="utf-8") as f:
    f.write("临时数据")
    temp_path = f.name
    print(f"临时文件:{temp_path}")
    # 临时文件:/tmp/tmpxxxxxxx.txt(Linux)

# 文件在 with 块外仍然存在(delete=False)
content = Path(temp_path).read_text(encoding="utf-8")
print(content)  # 临时数据
Path(temp_path).unlink()  # 手动删除

# 临时目录(自动删除)
with tempfile.TemporaryDirectory() as tmpdir:
    print(f"临时目录:{tmpdir}")
    # 在临时目录中操作
    temp_file = Path(tmpdir) / "test.txt"
    temp_file.write_text("hello", encoding="utf-8")
    print(temp_file.read_text(encoding="utf-8"))  # hello
# 离开 with 后临时目录及其内容自动删除

# 获取系统临时目录
print(tempfile.gettempdir())  # /tmp(Linux)或 C:\Users\...\Temp(Windows)

14. 综合示例

示例 1:简易日志系统

python
from pathlib import Path
from datetime import datetime

class SimpleLogger:
    def __init__(self, filepath, max_size=1024 * 1024):
        self.filepath = Path(filepath)
        self.max_size = max_size  # 默认 1MB
        self.filepath.parent.mkdir(parents=True, exist_ok=True)

    def _rotate(self):
        """日志文件超限时轮转"""
        if self.filepath.exists() and self.filepath.stat().st_size > self.max_size:
            backup = self.filepath.with_suffix(".old.log")
            self.filepath.rename(backup)

    def log(self, level, message):
        self._rotate()
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        line = f"[{timestamp}] [{level.upper()}] {message}\n"
        with open(self.filepath, "a", encoding="utf-8") as f:
            f.write(line)

    def info(self, msg):
        self.log("INFO", msg)

    def error(self, msg):
        self.log("ERROR", msg)

    def read_recent(self, n=10):
        """读取最近 n 行日志"""
        if not self.filepath.exists():
            return []
        lines = self.filepath.read_text(encoding="utf-8").splitlines()
        return lines[-n:]

# 使用
logger = SimpleLogger("logs/app.log")
logger.info("应用启动")
logger.info("用户 Alice 登录")
logger.error("数据库连接失败")

for line in logger.read_recent():
    print(line)
# [2026-03-18 14:30:00] [INFO] 应用启动
# [2026-03-18 14:30:00] [INFO] 用户 Alice 登录
# [2026-03-18 14:30:00] [ERROR] 数据库连接失败

import shutil
shutil.rmtree("logs")

示例 2:文件批量处理工具

python
from pathlib import Path
import shutil

def batch_rename(directory, pattern, old_ext, new_ext, dry_run=True):
    """批量重命名文件扩展名"""
    dir_path = Path(directory)
    files = list(dir_path.glob(f"{pattern}{old_ext}"))

    if not files:
        print("没有找到匹配的文件")
        return

    for f in files:
        new_name = f.with_suffix(new_ext)
        if dry_run:
            print(f"  [预览] {f.name}{new_name.name}")
        else:
            f.rename(new_name)
            print(f"  [完成] {f.name}{new_name.name}")

    print(f"共 {len(files)} 个文件{'(预览模式)' if dry_run else ''}")

# 准备测试
test_dir = Path("rename_test")
test_dir.mkdir(exist_ok=True)
for i in range(5):
    (test_dir / f"photo_{i}.jpeg").write_text("")

# 预览
batch_rename("rename_test", "*", ".jpeg", ".jpg", dry_run=True)
#   [预览] photo_0.jpeg → photo_0.jpg
#   [预览] photo_1.jpeg → photo_1.jpg
#   ...
# 共 5 个文件(预览模式)

# 执行
batch_rename("rename_test", "*", ".jpeg", ".jpg", dry_run=False)
#   [完成] photo_0.jpeg → photo_0.jpg
#   ...

# 验证
print([f.name for f in Path("rename_test").iterdir()])
# ['photo_0.jpg', 'photo_1.jpg', 'photo_2.jpg', 'photo_3.jpg', 'photo_4.jpg']

shutil.rmtree("rename_test")

示例 3:统计代码行数

python
from pathlib import Path

def count_lines(directory, extensions=None):
    """统计目录中代码文件的行数"""
    if extensions is None:
        extensions = {".py", ".js", ".ts", ".go", ".java"}

    stats = {}
    total_lines = 0
    total_files = 0

    for ext in extensions:
        files = list(Path(directory).rglob(f"*{ext}"))
        ext_lines = 0
        for f in files:
            try:
                lines = len(f.read_text(encoding="utf-8").splitlines())
                ext_lines += lines
            except (UnicodeDecodeError, PermissionError):
                continue
        if files:
            stats[ext] = {"files": len(files), "lines": ext_lines}
            total_files += len(files)
            total_lines += ext_lines

    return stats, total_files, total_lines

# 测试(统计当前目录)
# stats, files, lines = count_lines(".")
# print(f"共 {files} 个文件,{lines} 行代码")
# for ext, info in sorted(stats.items(), key=lambda x: -x[1]["lines"]):
#     print(f"  {ext}: {info['files']} 个文件,{info['lines']} 行")

15. 总结

读写操作速查

操作代码说明
读全部f.read()返回字符串
读一行f.readline()\n
读所有行f.readlines()返回列表
逐行遍历for line in f:内存友好
写字符串f.write(s)返回字符数
写多行f.writelines(lst)不自动加 \n
print 写入print(x, file=f)方便格式化

常用模块对照

任务推荐模块
路径操作pathlib(推荐) > os.path
文件复制/移动/删除shutil
JSONjson
CSVcsv
INI 配置configparser
TOML 配置tomllib(Python 3.11+)
序列化 Python 对象pickle(仅限可信数据)
二进制结构struct
临时文件tempfile
编码检测chardet(第三方)

最佳实践

python
# 1. 始终使用 with 语句操作文件
with open("file.txt", "r", encoding="utf-8") as f:
    content = f.read()

# 2. 始终指定 encoding 参数(避免平台差异)
# Windows 默认 GBK,Linux/macOS 默认 UTF-8

# 3. 优先使用 pathlib 而非 os.path
from pathlib import Path
p = Path("data") / "output.json"   # 比 os.path.join 更直观

# 4. 大文件逐行或分块处理
for line in open("big.txt", encoding="utf-8"):
    process(line)

# 5. CSV 写入时加 newline=""(Windows 下防止空行)
with open("data.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)

# 6. 二进制文件用 "rb"/"wb",不要指定 encoding
with open("image.png", "rb") as f:
    data = f.read()

Released under the MIT License.