不会吧，不会吧，都已经2024了，还有人在用os.path.join?不知道pathlib.Path有多香吗？

waketzheng

已于 2025-01-04 09:35:46 修改

阅读量446

点赞数 5

CC 4.0 BY-SA版权

文章标签： python pathlib

于 2024-12-25 18:31:47 首次发布

本文链接：https://blog.csdn.net/jaket5219999/article/details/144722768

兵马未动，粮草先行：Life is short, enjoy yourself~

今天看到这样一段类似这样的代码：

import os
import shutil

work_dir = "E:\\代码\\hello world"
data_dir = os.path.join(os.path.join(work_dir, "datasets"), "xmls")

for name in os.listdir(data_dir):
    if not name.endswith(".xml"):
        continue
    file = os.path.join(data_dir, name)
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()
    if not any(i.strip().startswith("<") for i in lines):
        category = os.path.split(data_dir)[-1]
        folder = "broken_" + category
        out_dir = os.path.join(os.path.dirname(data_dir), folder)
        os.makedirs(out_dir, exist_ok=True)
        new_path = os.path.join(out_dir, name + ".txt")
        shutil.move(file, new_path)
        print(f"Move {file} to {new_path}")

忍不住吐槽了一下，并强力推荐使用Python3.4以后加入标准库的pathlib

上例中原本20行的代码，改用pathlib的话，只需12行：

from pathlib import Path

work_dir = "E:\\代码\\hello world"
data_dir = Path(work_dir, "datasets", "xmls")

for p in data_dir.glob("*.xml"):
    lines = p.read_text(encoding="utf-8").splitlines()
    if not any(i.strip().startswith("<") for i in lines):
        out_dir = data_dir.parent / f"broken_{data_dir.name}"
        out_dir.mkdir(parents=True, exist_ok=True)
        new_path = p.rename(out_dir / f"{p.name}.txt")
        print(f"Move {p} to {new_path}")

=========================================================================

os vs pathlib：

import os

current_dir = os.getcwd() # 当前目录
assert isinstance(current_dir, str)
parent_dir = os.path.dirname(current_dir) # 获取上级目录
grandparent_dir = os.path.dirname(os.path.dirname(current_dir)) # 上上级目录

# --------------------------------------------------------------
from pathlib import Path
current_dir = Path.cwd() # 当前目录
assert isinstance(current_dir, Path)
parent_dir = current_dir.parent # 上一级目录
grandparent_dir = current_dir.parent.parent # 上上级

2. 遍历某个路径下的某类文件

import os

target_dir = "D:\\Documents\\"
suffix = ".py"

# 一级遍历
for name in os.listdir(target_dir):
    assert os.sep not in name # 只是文件名，不包含路径
    if not name.endswith(suffix):
        continue
    file_path = os.path.join(target_dir, name)
    assert os.path.exists(file_path)


# 嵌套遍历
for root, dirs, files in os.walk(target_dir):
    for name in files:
        if not name.endswith(suffix):
            continue
        file_path = os.path.join(root, name)
        assert os.path.isfile(file_path)

# ------------------------------------------------
from pathlib import Path

# 一级遍历
for file in Path(target_dir).glob(f'*{suffix}'):
    assert isinstance(file, Path) and os.sep in str(file) # 包含路径
    name = file.name # 文件名
    assert isinstance(name, str) and os.sep not in name
    assert file.exists() # 判断是否存在

# 嵌套遍历
for file in Path(target_dir).rglob(f'*{suffix}'):
    if not file.is_file(): # 文件夹名称为xxx.py的也会遍历出来
        continue
    assert file.name.endswith(suffix)

3. 路径拼接

import os

target = "E:\\python scripts\\simple\\print_heart.py"
current_dir = "E:\\Downloads"

joined = os.path.join(os.path.dirname(current_dir), 'python scripts', 'simple', 'print_heart.py')
joined2 = os.path.join(os.path.dirname(current_dir), 'python scripts\\simple\\print_heart.py')
assert joined == joined2 == target

# ---------------------------------
from pathlib import Path

target = Path("E:\\python scripts\\simple\\print_heart.py")
current_dir = Path("E:\\Downloads")

joined = current_dir.parent / 'python scripts' / 'simple' / 'print_heart.py'
joined2 = current_dir.parent / 'python scripts\\simple\\print_heart.py'
assert joined == joined2 == target

# pathlib也支持joinpath和Path('a', 'b', 'c')的方式
joined3 = current_dir.parent.joinpath('python scripts\\simple\\print_heart.py')
joined4 = current_dir.parent.joinpath('python scripts/simple/print_heart.py')
joined5 = current_dir.parent.joinpath('python scripts').joinpath('simple').joinpath('print_heart.py')

joined6 = Path(current_dir.parent, 'python scripts', 'simple', 'print_heart.py')
joined7 = Path(str(current_dir.parent), 'python scripts', 'simple', 'print_heart.py')
joined8 = Path(current_dir.parent, 'python scripts/simple/print_heart.py')
joined9 = Path(current_dir.parent, 'python scripts\\simple\\print_heart.py')

for i in (joined3, joined4, joined5, joined6, joined7, joined8, joined9):
    assert i == target

一些比较特殊的情况：

4. pathlib.Path的一些常用属性和方法

file = Path('E:/user/project/data/sample.txt') # '\\' 和 '/' 都可以
print(file.name)      # 输出：sample.txt
print(file.suffix)    # 输出：.txt
print(file.stem)      # 输出：sample
print(file.parent)    # 输出：E:\user\project\data
print(file.exists())  # 检查路径是否存在
print(file.is_file()) # 检查是否为文件
print(file.is_dir())  # 检查是否为目录
print(len(file.read_bytes())) # 输出文件大小

file.read_bytes()     # 读取二进制内容，返回bytes类型
file.read_text()      # 读取文本内容，返回str类型
file.write_bytes(b'') # 写入二进制
file.write_text('xx') # 写入文本
file.touch()          # 创建空文件
file.mkdir()          # 创建目录
file.parent.glob('*') # 遍历父目录，相当于os.listdir(os.path.dirname(file))
file.parent.rglob('*')# 嵌套遍历父目录及其所有子目录里的文件和文件夹 
file.unlink()         # 删除文件
file.parent.rmdir()   # 删除父目录（目录不为空的话，会报错）
file.with_suffix('.py') # 更换后缀，返回Path('E:/user/project/data/sample.py')
file.with_name('sample.py') # 更换文件名，返回Path('E:/user/project/data/sample.py')

打个广告：Ruff很好用，搭配mypy可以减少很多无脑错误，由于经常组合使用我还特意封装了一个便捷小工具fast-dev-cli～