Appearance
Python 文件与 IO 操作
文件操作是编程中最基础也最常用的功能之一。Python 提供了简洁的文件读写 API 和强大的 pathlib 模块。
1. 文件读写基础
1.1 open() 函数
open() 是文件操作的核心函数,返回一个文件对象。
python
# 语法:open(file, mode='r', encoding=None)
# 必须在使用完毕后关闭文件
# ❌ 不推荐:手动关闭(容易遗忘,异常时可能跳过 close)
f = open("test.txt", "w", encoding="utf-8")
f.write("Hello")
f.close()
# ✅ 推荐:使用 with 语句(自动关闭,即使发生异常也会关闭)
with open("test.txt", "w", encoding="utf-8") as f:
f.write("Hello")
# 离开 with 块后文件自动关闭1.2 文件打开模式
| 模式 | 说明 | 文件不存在时 | 文件存在时 |
|---|---|---|---|
r | 只读(默认) | 报错 FileNotFoundError | 从头读取 |
w | 只写 | 创建新文件 | 清空后写入 |
a | 追加 | 创建新文件 | 在末尾追加 |
x | 排他创建 | 创建新文件 | 报错 FileExistsError |
r+ | 读写 | 报错 | 从头读写 |
w+ | 读写 | 创建新文件 | 清空后读写 |
a+ | 读追加 | 创建新文件 | 可读,写入在末尾 |
rb | 二进制读 | 报错 | 读取字节 |
wb | 二进制写 | 创建新文件 | 清空后写入字节 |
python
# 创建测试文件
with open("demo.txt", "w", encoding="utf-8") as f:
f.write("第一行\n第二行\n第三行\n")
# 'r' 只读
with open("demo.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# 第一行
# 第二行
# 第三行
# 'a' 追加
with open("demo.txt", "a", encoding="utf-8") as f:
f.write("第四行\n")
# 'x' 排他创建(文件已存在时报错)
try:
with open("demo.txt", "x", encoding="utf-8") as f:
f.write("新内容")
except FileExistsError:
print("文件已存在,无法创建") # 文件已存在,无法创建2. 读取文件
2.1 read() —— 读取全部内容
python
# 准备测试文件
with open("sample.txt", "w", encoding="utf-8") as f:
f.write("Python 是一门优雅的语言\n学习 Python 很有趣\n让我们开始吧")
# 读取全部内容
with open("sample.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# Python 是一门优雅的语言
# 学习 Python 很有趣
# 让我们开始吧
print(type(content)) # <class 'str'>
print(len(content)) # 31
# read(n) —— 读取 n 个字符
with open("sample.txt", "r", encoding="utf-8") as f:
chunk = f.read(10)
print(chunk) # Python 是一门
print(f.read(5)) # 优雅的语言
rest = f.read() # 读取剩余全部
print(rest)
# (换行)学习 Python 很有趣
# 让我们开始吧2.2 readline() —— 逐行读取
python
with open("sample.txt", "r", encoding="utf-8") as f:
line1 = f.readline() # 读取一行(包含 \n)
line2 = f.readline()
line3 = f.readline()
line4 = f.readline() # 文件末尾返回空字符串
print(repr(line1)) # 'Python 是一门优雅的语言\n'
print(repr(line2)) # '学习 Python 很有趣\n'
print(repr(line3)) # '让我们开始吧'
print(repr(line4)) # '' (文件末尾)2.3 readlines() —— 读取所有行
python
with open("sample.txt", "r", encoding="utf-8") as f:
lines = f.readlines() # 返回列表,每个元素是一行
print(lines)
# ['Python 是一门优雅的语言\n', '学习 Python 很有趣\n', '让我们开始吧']
# 去除每行的换行符
lines = [line.strip() for line in lines]
print(lines)
# ['Python 是一门优雅的语言', '学习 Python 很有趣', '让我们开始吧']2.4 遍历文件(推荐方式)
python
# ✅ 最推荐:直接遍历文件对象(惰性读取,内存友好)
with open("sample.txt", "r", encoding="utf-8") as f:
for line in f:
print(line.strip())
# Python 是一门优雅的语言
# 学习 Python 很有趣
# 让我们开始吧
# 带行号遍历
with open("sample.txt", "r", encoding="utf-8") as f:
for i, line in enumerate(f, start=1):
print(f"第{i}行:{line.strip()}")
# 第1行:Python 是一门优雅的语言
# 第2行:学习 Python 很有趣
# 第3行:让我们开始吧3. 写入文件
3.1 write() —— 写入字符串
python
# 写入(覆盖)
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello, Python!\n")
f.write("文件写入示例\n")
# write 返回写入的字符数
count = f.write("第三行\n")
print(f"写入了 {count} 个字符") # 写入了 4 个字符
# 验证内容
with open("output.txt", "r", encoding="utf-8") as f:
print(f.read())
# Hello, Python!
# 文件写入示例
# 第三行3.2 writelines() —— 写入多行
python
lines = ["苹果\n", "香蕉\n", "樱桃\n"]
with open("fruits.txt", "w", encoding="utf-8") as f:
f.writelines(lines)
# ⚠️ writelines 不会自动添加换行符,需要自己加 \n
# 从列表写入(不含 \n 的列表)
items = ["Python", "Java", "Go", "Rust"]
with open("languages.txt", "w", encoding="utf-8") as f:
f.writelines(item + "\n" for item in items)
# 也可以用 join + write
with open("languages2.txt", "w", encoding="utf-8") as f:
f.write("\n".join(items))
with open("languages.txt", "r", encoding="utf-8") as f:
print(f.read())
# Python
# Java
# Go
# Rust3.3 追加写入
python
# 'a' 模式:在文件末尾追加
with open("log.txt", "w", encoding="utf-8") as f:
f.write("[2024-01-01] 应用启动\n")
with open("log.txt", "a", encoding="utf-8") as f:
f.write("[2024-01-01] 用户登录\n")
f.write("[2024-01-01] 数据处理完成\n")
with open("log.txt", "r", encoding="utf-8") as f:
print(f.read())
# [2024-01-01] 应用启动
# [2024-01-01] 用户登录
# [2024-01-01] 数据处理完成3.4 print() 写入文件
python
with open("print_output.txt", "w", encoding="utf-8") as f:
print("Hello, World!", file=f)
print("数字:", 42, file=f)
print("列表:", [1, 2, 3], file=f)
with open("print_output.txt", "r", encoding="utf-8") as f:
print(f.read())
# Hello, World!
# 数字: 42
# 列表: [1, 2, 3]4. 文件指针操作
4.1 tell() 和 seek()
python
with open("sample.txt", "r", encoding="utf-8") as f:
# tell() 返回当前指针位置(字节偏移量)
print(f.tell()) # 0 (文件开头)
f.read(6) # 读取 6 个字符(UTF-8 中文占 3 字节)
print(f.tell()) # 18 或其他值(取决于具体内容的字节数)
# seek(offset, whence) 移动指针
# whence: 0=文件开头(默认),1=当前位置,2=文件末尾
f.seek(0) # 回到文件开头
print(f.tell()) # 0
print(f.readline().strip()) # Python 是一门优雅的语言
# 实际应用:读取文件最后几行
with open("sample.txt", "rb") as f: # 二进制模式才能从末尾 seek
f.seek(0, 2) # 移到文件末尾
file_size = f.tell()
print(f"文件大小:{file_size} 字节")4.2 文件对象的常用属性
python
with open("sample.txt", "r", encoding="utf-8") as f:
print(f.name) # sample.txt 文件名
print(f.mode) # r 打开模式
print(f.encoding) # utf-8 编码
print(f.closed) # False 是否已关闭
print(f.closed) # True 离开 with 后自动关闭5. 大文件处理
5.1 逐行处理(内存友好)
python
# 创建一个较大的测试文件
with open("big_file.txt", "w", encoding="utf-8") as f:
for i in range(10000):
f.write(f"这是第 {i+1} 行数据\n")
# ❌ 不推荐:一次性读取全部(大文件会耗尽内存)
# content = open("big_file.txt").read()
# ✅ 推荐:逐行读取
line_count = 0
with open("big_file.txt", "r", encoding="utf-8") as f:
for line in f: # 每次只加载一行到内存
line_count += 1
print(f"共 {line_count} 行") # 共 10000 行5.2 分块读取二进制文件
python
def copy_file(src, dst, chunk_size=8192):
"""分块复制文件(适用于大文件)"""
bytes_copied = 0
with open(src, "rb") as fin, open(dst, "wb") as fout:
while True:
chunk = fin.read(chunk_size) # 每次读取 8KB
if not chunk:
break
fout.write(chunk)
bytes_copied += len(chunk)
return bytes_copied
# 测试
size = copy_file("big_file.txt", "big_file_copy.txt")
print(f"复制了 {size} 字节") # 复制了 288890 字节
import os
os.remove("big_file_copy.txt")5.3 使用 iter() + sentinel 读取
python
# 用 iter 的双参数形式:iter(callable, sentinel)
# callable 每次被调用返回一个值,直到返回 sentinel 时停止
with open("big_file.txt", "rb") as f:
chunks = []
for chunk in iter(lambda: f.read(4096), b""): # 读到空字节停止
chunks.append(chunk)
total = sum(len(c) for c in chunks)
print(f"分 {len(chunks)} 块读取,共 {total} 字节")
# 分 71 块读取,共 288890 字节
os.remove("big_file.txt")6. 二进制文件操作
6.1 读写二进制数据
python
# 写入二进制数据
data = bytes(range(256)) # 0x00 到 0xFF
with open("binary.dat", "wb") as f:
f.write(data)
# 读取二进制数据
with open("binary.dat", "rb") as f:
content = f.read()
print(type(content)) # <class 'bytes'>
print(len(content)) # 256
print(content[:10]) # b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09'
print(list(content[:5])) # [0, 1, 2, 3, 4]
import os
os.remove("binary.dat")6.2 struct —— 处理二进制结构体
python
import struct
# 打包:Python 值 → 二进制字节
# 格式:i=int(4字节) f=float(4字节) d=double(8字节) s=char[]
packed = struct.pack("ifd", 42, 3.14, 2.718)
print(packed) # b'*\x00\x00\x00\xc3\xf5H@\xb6\xf3\xfdT\xfb\xbf\x05@'
print(len(packed)) # 16
# 解包:二进制字节 → Python 值
values = struct.unpack("ifd", packed)
print(values) # (42, 3.140000104904175, 2.718)
# 实用示例:读写自定义二进制格式(如图片头部)
header_format = "4sII" # 4字节标识 + 2个无符号整数(宽、高)
header = struct.pack(header_format, b"IMG\x00", 1920, 1080)
print(len(header)) # 12
magic, width, height = struct.unpack(header_format, header)
print(f"标识:{magic},尺寸:{width}x{height}")
# 标识:b'IMG\x00',尺寸:1920x1080
# 常用格式字符
# b/B int8/uint8 h/H int16/uint16 i/I int32/uint32
# q/Q int64/uint64 f float32 d float64
# s char[] ? bool
# < 小端序 > 大端序 ! 网络序(大端)7. pathlib —— 现代路径操作
7.1 Path 对象基础
python
from pathlib import Path
# 创建路径
p = Path(".") # 当前目录
home = Path.home() # 用户主目录
absolute = Path("/usr/local/bin")
# 路径拼接(/ 运算符)
config = home / ".config" / "app" / "config.json"
print(config) # /home/user/.config/app/config.json(Linux)
# 路径属性
p = Path("/home/user/projects/report.csv")
print(p.name) # report.csv 文件全名
print(p.stem) # report 文件名(无扩展名)
print(p.suffix) # .csv 扩展名
print(p.suffixes) # ['.csv'] 所有扩展名
print(p.parent) # /home/user/projects 父目录
print(p.parents[0]) # /home/user/projects
print(p.parents[1]) # /home/user
print(p.parts) # ('/', 'home', 'user', 'projects', 'report.csv')
print(p.anchor) # / 根
# 多重扩展名
p = Path("archive.tar.gz")
print(p.suffixes) # ['.tar', '.gz']
print(p.stem) # archive.tar
# 获取真正的文件名(去除所有扩展名)
print(p.name.split(".")[0]) # archive7.2 路径判断与信息
python
from pathlib import Path
p = Path(".")
# 判断
print(p.exists()) # True
print(p.is_dir()) # True
print(p.is_file()) # False
print(p.is_absolute()) # False
# 转换
print(p.resolve()) # /home/user/project (绝对路径)
print(p.absolute()) # /home/user/project
# 文件信息
sample = Path("sample.txt")
if sample.exists():
stat = sample.stat()
print(f"大小:{stat.st_size} 字节")
print(f"修改时间戳:{stat.st_mtime}")
# 格式化修改时间
from datetime import datetime
mtime = datetime.fromtimestamp(stat.st_mtime)
print(f"修改时间:{mtime:%Y-%m-%d %H:%M:%S}")7.3 文件读写
python
from pathlib import Path
p = Path("pathlib_demo.txt")
# 写入文本
p.write_text("Hello, pathlib!\n你好,世界!", encoding="utf-8")
# 读取文本
content = p.read_text(encoding="utf-8")
print(content)
# Hello, pathlib!
# 你好,世界!
# 写入二进制
bp = Path("binary_demo.dat")
bp.write_bytes(b"\x00\x01\x02\x03")
# 读取二进制
data = bp.read_bytes()
print(data) # b'\x00\x01\x02\x03'
# 清理
p.unlink()
bp.unlink()7.4 目录操作
python
from pathlib import Path
# 创建目录
Path("test_dir/sub1/sub2").mkdir(parents=True, exist_ok=True)
Path("test_dir/sub1/a.txt").write_text("aaa", encoding="utf-8")
Path("test_dir/sub1/b.py").write_text("bbb", encoding="utf-8")
Path("test_dir/sub1/sub2/c.txt").write_text("ccc", encoding="utf-8")
Path("test_dir/d.json").write_text("{}", encoding="utf-8")
# 列出直接子项
print("=== iterdir ===")
for item in Path("test_dir").iterdir():
kind = "目录" if item.is_dir() else "文件"
print(f" [{kind}] {item.name}")
# === iterdir ===
# [目录] sub1
# [文件] d.json
# glob 模式匹配(当前目录层级)
print("=== glob *.txt ===")
for f in Path("test_dir/sub1").glob("*.txt"):
print(f" {f}")
# === glob *.txt ===
# test_dir/sub1/a.txt
# rglob 递归匹配(所有层级)
print("=== rglob *.txt ===")
for f in Path("test_dir").rglob("*.txt"):
print(f" {f}")
# === rglob *.txt ===
# test_dir/sub1/a.txt
# test_dir/sub1/sub2/c.txt
# 常用 glob 模式
# *.py 当前目录所有 .py 文件
# **/*.py 所有层级的 .py 文件(等同于 rglob)
# data_* 以 data_ 开头的文件
# *.[jt]s .js 或 .ts 文件
# 清理(递归删除需要 shutil)
import shutil
shutil.rmtree("test_dir")7.5 路径修改
python
from pathlib import Path
p = Path("/home/user/data/report.csv")
# 替换文件名
print(p.with_name("summary.csv")) # /home/user/data/summary.csv
# 替换扩展名
print(p.with_suffix(".json")) # /home/user/data/report.json
print(p.with_suffix("")) # /home/user/data/report
# 替换文件名(不含扩展名)
print(p.with_stem("output")) # /home/user/data/output.csv (Python 3.9+)
# 相对路径
base = Path("/home/user")
print(p.relative_to(base)) # data/report.csv
# 拼接
print(p.parent / "backup" / p.name) # /home/user/data/backup/report.csv8. JSON 文件操作
8.1 读写 JSON
python
import json
from pathlib import Path
# 写入 JSON
data = {
"users": [
{"name": "Alice", "age": 25, "skills": ["Python", "SQL"]},
{"name": "Bob", "age": 30, "skills": ["Java", "Go"]},
],
"total": 2,
"active": True
}
with open("users.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 读取 JSON
with open("users.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
print(loaded["users"][0]["name"]) # Alice
print(loaded["users"][1]["skills"]) # ['Java', 'Go']
print(loaded["total"]) # 2
Path("users.json").unlink()8.2 处理特殊类型
python
import json
from datetime import datetime, date
from pathlib import Path
# JSON 不支持 datetime、set、bytes 等类型
# 需要自定义编码器
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, set):
return list(obj)
if isinstance(obj, bytes):
return obj.hex()
return super().default(obj)
data = {
"created": datetime(2024, 6, 15, 10, 30),
"tags": {"python", "tutorial"},
"token": b"\xab\xcd\xef",
}
json_str = json.dumps(data, cls=CustomEncoder, ensure_ascii=False)
print(json_str)
# {"created": "2024-06-15T10:30:00", "tags": ["tutorial", "python"], "token": "abcdef"}9. CSV 文件操作
9.1 读写 CSV
python
import csv
from pathlib import Path
# 写入 CSV
data = [
["姓名", "年龄", "城市"],
["Alice", 25, "Beijing"],
["Bob", 30, "Shanghai"],
["Charlie", 28, "Shenzhen"],
]
with open("people.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerows(data) # 写入所有行
# 也可以逐行写入:writer.writerow(["David", 35, "Guangzhou"])
# 读取 CSV
with open("people.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader) # 读取表头
print(f"表头:{header}") # 表头:['姓名', '年龄', '城市']
for row in reader:
print(f" {row[0]},{row[1]}岁,{row[2]}")
# Alice,25岁,Beijing
# Bob,30岁,Shanghai
# Charlie,28岁,Shenzhen9.2 DictReader / DictWriter
python
import csv
# DictWriter —— 用字典写入(更直观)
fields = ["name", "age", "city"]
users = [
{"name": "Alice", "age": 25, "city": "Beijing"},
{"name": "Bob", "age": 30, "city": "Shanghai"},
]
with open("users.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fields)
writer.writeheader() # 写入表头
writer.writerows(users) # 写入所有行
# DictReader —— 读取为字典
with open("users.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
print(f"字段:{reader.fieldnames}") # 字段:['name', 'age', 'city']
for row in reader:
print(f" {row['name']} - {row['city']}")
# Alice - Beijing
# Bob - Shanghai
from pathlib import Path
Path("people.csv").unlink()
Path("users.csv").unlink()10. 其他文件格式
10.1 pickle —— Python 对象序列化
python
import pickle
# pickle 可以保存几乎任何 Python 对象(但只有 Python 能读取)
data = {
"list": [1, 2, 3],
"tuple": (4, 5),
"set": {6, 7, 8},
"nested": {"a": [1, 2]},
}
# 序列化(写入)
with open("data.pkl", "wb") as f: # 必须用二进制模式
pickle.dump(data, f)
# 反序列化(读取)
with open("data.pkl", "rb") as f:
loaded = pickle.load(f)
print(loaded)
# {'list': [1, 2, 3], 'tuple': (4, 5), 'set': {8, 6, 7}, 'nested': {'a': [1, 2]}}
print(loaded == data) # True
# 序列化为字节串(不写入文件)
raw = pickle.dumps(data)
print(type(raw)) # <class 'bytes'>
restored = pickle.loads(raw)
print(restored == data) # True
# ⚠️ 安全警告:永远不要 pickle.load 不信任的数据!
# pickle 可以执行任意代码,只加载你自己创建的 .pkl 文件
import os
os.remove("data.pkl")10.2 INI / TOML 配置文件
python
# ===== configparser:处理 INI 文件 =====
import configparser
# 写入 INI
config = configparser.ConfigParser()
config["DEFAULT"] = {"debug": "false"}
config["server"] = {"host": "127.0.0.1", "port": "8000"}
config["database"] = {"url": "sqlite:///app.db", "pool_size": "5"}
with open("config.ini", "w") as f:
config.write(f)
# 读取 INI
config = configparser.ConfigParser()
config.read("config.ini")
print(config["server"]["host"]) # 127.0.0.1
print(config["server"]["port"]) # 8000
print(config["database"]["url"]) # sqlite:///app.db
print(config["server"]["debug"]) # false (来自 DEFAULT)
print(config.getint("server", "port")) # 8000 (转为 int)
print(config.getboolean("server", "debug")) # False
# 遍历
for section in config.sections():
print(f"[{section}]")
for key, value in config[section].items():
print(f" {key} = {value}")
# [server]
# host = 127.0.0.1
# port = 8000
# debug = false
# [database]
# url = sqlite:///app.db
# pool_size = 5
# debug = false
# ===== tomllib:处理 TOML 文件(Python 3.11+)=====
import tomllib
from pathlib import Path
# 写入 TOML(标准库只有读取,写入需要第三方库 tomli-w)
toml_content = """
[project]
name = "myapp"
version = "1.0.0"
[server]
host = "127.0.0.1"
port = 8000
debug = false
[database]
url = "sqlite:///app.db"
"""
Path("config.toml").write_text(toml_content)
# 读取 TOML
with open("config.toml", "rb") as f:
toml_data = tomllib.load(f)
print(toml_data["project"]["name"]) # myapp
print(toml_data["server"]["port"]) # 8000 (直接是 int)
print(toml_data["server"]["debug"]) # False (直接是 bool)
import os
os.remove("config.ini")
os.remove("config.toml")11. 编码处理
11.1 常见编码问题
python
# UTF-8:国际通用,推荐使用
# GBK/GB2312:中文 Windows 默认编码
# Latin-1/ISO-8859-1:西欧编码
# 写入 UTF-8
with open("utf8.txt", "w", encoding="utf-8") as f:
f.write("你好,世界!")
# 用错误编码读取会乱码或报错
try:
with open("utf8.txt", "r", encoding="gbk") as f:
print(f.read())
except UnicodeDecodeError as e:
print(f"解码错误:{e}")
# 解码错误:'gbk' codec can't decode byte ...
# 正确读取
with open("utf8.txt", "r", encoding="utf-8") as f:
print(f.read()) # 你好,世界!11.2 处理编码错误
python
# errors 参数控制编码错误的处理方式
# errors='strict'(默认)—— 报错
# errors='ignore' —— 忽略无法解码的字节
# errors='replace' —— 用 ? 替换
# errors='backslashreplace' —— 用转义序列替换
# 写入一个 GBK 编码的文件用于测试
with open("gbk.txt", "w", encoding="gbk") as f:
f.write("你好世界")
# 用 UTF-8 读取 GBK 文件
with open("gbk.txt", "r", encoding="utf-8", errors="replace") as f:
content = f.read()
print(content) # ���ã����(乱码被替换为 ?)
with open("gbk.txt", "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
print(repr(content)) # 忽略无法解码的字节
# 正确方式:用 GBK 解码
with open("gbk.txt", "r", encoding="gbk") as f:
print(f.read()) # 你好世界
import os
os.remove("utf8.txt")
os.remove("gbk.txt")11.3 自动检测编码
python
# 使用第三方库 chardet 检测编码
# pip install chardet
# import chardet
#
# with open("unknown.txt", "rb") as f:
# raw = f.read()
# result = chardet.detect(raw)
# print(result)
# # {'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
#
# content = raw.decode(result["encoding"])
# print(content)12. shutil —— 高级文件操作
python
import shutil
from pathlib import Path
# 准备测试目录
Path("src_dir/sub").mkdir(parents=True, exist_ok=True)
Path("src_dir/a.txt").write_text("aaa", encoding="utf-8")
Path("src_dir/b.txt").write_text("bbb", encoding="utf-8")
Path("src_dir/sub/c.txt").write_text("ccc", encoding="utf-8")
# 复制文件
shutil.copy("src_dir/a.txt", "a_copy.txt") # 复制文件
shutil.copy2("src_dir/b.txt", "b_copy.txt") # 复制文件(保留元数据)
# 复制整个目录树
shutil.copytree("src_dir", "dst_dir")
print(list(Path("dst_dir").rglob("*")))
# [Path('dst_dir/a.txt'), Path('dst_dir/b.txt'), Path('dst_dir/sub'), Path('dst_dir/sub/c.txt')]
# 移动文件/目录
shutil.move("a_copy.txt", "dst_dir/a_moved.txt")
# 删除整个目录树
shutil.rmtree("dst_dir")
# 磁盘使用情况
usage = shutil.disk_usage(".")
print(f"总计:{usage.total / (1024**3):.1f} GB")
print(f"已用:{usage.used / (1024**3):.1f} GB")
print(f"可用:{usage.free / (1024**3):.1f} GB")
# 创建压缩文件
shutil.make_archive("backup", "zip", "src_dir")
print(Path("backup.zip").stat().st_size, "字节")
# 解压
shutil.unpack_archive("backup.zip", "restored_dir")
print(list(Path("restored_dir").rglob("*.txt")))
# [Path('restored_dir/a.txt'), Path('restored_dir/b.txt'), Path('restored_dir/sub/c.txt')]
# 清理
shutil.rmtree("src_dir")
shutil.rmtree("restored_dir")
Path("b_copy.txt").unlink()
Path("backup.zip").unlink()13. 临时文件
python
import tempfile
from pathlib import Path
# 临时文件(自动删除)
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt",
delete=False, encoding="utf-8") as f:
f.write("临时数据")
temp_path = f.name
print(f"临时文件:{temp_path}")
# 临时文件:/tmp/tmpxxxxxxx.txt(Linux)
# 文件在 with 块外仍然存在(delete=False)
content = Path(temp_path).read_text(encoding="utf-8")
print(content) # 临时数据
Path(temp_path).unlink() # 手动删除
# 临时目录(自动删除)
with tempfile.TemporaryDirectory() as tmpdir:
print(f"临时目录:{tmpdir}")
# 在临时目录中操作
temp_file = Path(tmpdir) / "test.txt"
temp_file.write_text("hello", encoding="utf-8")
print(temp_file.read_text(encoding="utf-8")) # hello
# 离开 with 后临时目录及其内容自动删除
# 获取系统临时目录
print(tempfile.gettempdir()) # /tmp(Linux)或 C:\Users\...\Temp(Windows)14. 综合示例
示例 1:简易日志系统
python
from pathlib import Path
from datetime import datetime
class SimpleLogger:
def __init__(self, filepath, max_size=1024 * 1024):
self.filepath = Path(filepath)
self.max_size = max_size # 默认 1MB
self.filepath.parent.mkdir(parents=True, exist_ok=True)
def _rotate(self):
"""日志文件超限时轮转"""
if self.filepath.exists() and self.filepath.stat().st_size > self.max_size:
backup = self.filepath.with_suffix(".old.log")
self.filepath.rename(backup)
def log(self, level, message):
self._rotate()
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
line = f"[{timestamp}] [{level.upper()}] {message}\n"
with open(self.filepath, "a", encoding="utf-8") as f:
f.write(line)
def info(self, msg):
self.log("INFO", msg)
def error(self, msg):
self.log("ERROR", msg)
def read_recent(self, n=10):
"""读取最近 n 行日志"""
if not self.filepath.exists():
return []
lines = self.filepath.read_text(encoding="utf-8").splitlines()
return lines[-n:]
# 使用
logger = SimpleLogger("logs/app.log")
logger.info("应用启动")
logger.info("用户 Alice 登录")
logger.error("数据库连接失败")
for line in logger.read_recent():
print(line)
# [2026-03-18 14:30:00] [INFO] 应用启动
# [2026-03-18 14:30:00] [INFO] 用户 Alice 登录
# [2026-03-18 14:30:00] [ERROR] 数据库连接失败
import shutil
shutil.rmtree("logs")示例 2:文件批量处理工具
python
from pathlib import Path
import shutil
def batch_rename(directory, pattern, old_ext, new_ext, dry_run=True):
"""批量重命名文件扩展名"""
dir_path = Path(directory)
files = list(dir_path.glob(f"{pattern}{old_ext}"))
if not files:
print("没有找到匹配的文件")
return
for f in files:
new_name = f.with_suffix(new_ext)
if dry_run:
print(f" [预览] {f.name} → {new_name.name}")
else:
f.rename(new_name)
print(f" [完成] {f.name} → {new_name.name}")
print(f"共 {len(files)} 个文件{'(预览模式)' if dry_run else ''}")
# 准备测试
test_dir = Path("rename_test")
test_dir.mkdir(exist_ok=True)
for i in range(5):
(test_dir / f"photo_{i}.jpeg").write_text("")
# 预览
batch_rename("rename_test", "*", ".jpeg", ".jpg", dry_run=True)
# [预览] photo_0.jpeg → photo_0.jpg
# [预览] photo_1.jpeg → photo_1.jpg
# ...
# 共 5 个文件(预览模式)
# 执行
batch_rename("rename_test", "*", ".jpeg", ".jpg", dry_run=False)
# [完成] photo_0.jpeg → photo_0.jpg
# ...
# 验证
print([f.name for f in Path("rename_test").iterdir()])
# ['photo_0.jpg', 'photo_1.jpg', 'photo_2.jpg', 'photo_3.jpg', 'photo_4.jpg']
shutil.rmtree("rename_test")示例 3:统计代码行数
python
from pathlib import Path
def count_lines(directory, extensions=None):
"""统计目录中代码文件的行数"""
if extensions is None:
extensions = {".py", ".js", ".ts", ".go", ".java"}
stats = {}
total_lines = 0
total_files = 0
for ext in extensions:
files = list(Path(directory).rglob(f"*{ext}"))
ext_lines = 0
for f in files:
try:
lines = len(f.read_text(encoding="utf-8").splitlines())
ext_lines += lines
except (UnicodeDecodeError, PermissionError):
continue
if files:
stats[ext] = {"files": len(files), "lines": ext_lines}
total_files += len(files)
total_lines += ext_lines
return stats, total_files, total_lines
# 测试(统计当前目录)
# stats, files, lines = count_lines(".")
# print(f"共 {files} 个文件,{lines} 行代码")
# for ext, info in sorted(stats.items(), key=lambda x: -x[1]["lines"]):
# print(f" {ext}: {info['files']} 个文件,{info['lines']} 行")15. 总结
读写操作速查
| 操作 | 代码 | 说明 |
|---|---|---|
| 读全部 | f.read() | 返回字符串 |
| 读一行 | f.readline() | 含 \n |
| 读所有行 | f.readlines() | 返回列表 |
| 逐行遍历 | for line in f: | 内存友好 |
| 写字符串 | f.write(s) | 返回字符数 |
| 写多行 | f.writelines(lst) | 不自动加 \n |
| print 写入 | print(x, file=f) | 方便格式化 |
常用模块对照
| 任务 | 推荐模块 |
|---|---|
| 路径操作 | pathlib(推荐) > os.path |
| 文件复制/移动/删除 | shutil |
| JSON | json |
| CSV | csv |
| INI 配置 | configparser |
| TOML 配置 | tomllib(Python 3.11+) |
| 序列化 Python 对象 | pickle(仅限可信数据) |
| 二进制结构 | struct |
| 临时文件 | tempfile |
| 编码检测 | chardet(第三方) |
最佳实践
python
# 1. 始终使用 with 语句操作文件
with open("file.txt", "r", encoding="utf-8") as f:
content = f.read()
# 2. 始终指定 encoding 参数(避免平台差异)
# Windows 默认 GBK,Linux/macOS 默认 UTF-8
# 3. 优先使用 pathlib 而非 os.path
from pathlib import Path
p = Path("data") / "output.json" # 比 os.path.join 更直观
# 4. 大文件逐行或分块处理
for line in open("big.txt", encoding="utf-8"):
process(line)
# 5. CSV 写入时加 newline=""(Windows 下防止空行)
with open("data.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
# 6. 二进制文件用 "rb"/"wb",不要指定 encoding
with open("image.png", "rb") as f:
data = f.read()