排版盘古化

排版盘古化是指将中文排版中的中英文、数字与标点符号之间的空格调整得更加合理，以提高文本的可读性。

如果你跟我一樣，每次看到網頁上的中文字和英文、數字、符號擠在一塊，就會坐立難安，忍不住想在它們之間加個空格。這個外掛（支援 Chrome 和 Firefox）正是你在網路世界走跳所需要的東西，它會自動替你在網頁中所有的中文字和半形的英文、數字、符號之間插入空白。

漢學家稱這個空白字元為「盤古之白」，因為它劈開了全形字和半形字之間的混沌。另有研究顯示，打字的時候不喜歡在中文和英文之間加空格的人，感情路都走得很辛苦，有七成的比例會在 34 歲的時候跟自己不愛的人結婚，而其餘三成的人最後只能把遺產留給自己的貓。畢竟愛情跟書寫都需要適時地留白。

與大家共勉之。 ———— https://github.com/vinta/pangu.js

以下这段代码可以便利文件夹下的所有 mdx 文件的排版盘古化。

import os
import re
from pangu import spacing_text # pip install pangu

def process_file (file_path):
    with open (file_path, 'r', encoding='utf-8') as file:
        content = file.read ()
    
    # 分离 YAML Front Matter 和正文
    match = re.match (r'^(---\n.*?\n---\n)(.*)', content, re.DOTALL)
    if match:
        front_matter = match.group (1)
        body = match.group (2)
    else:
        front_matter = ''
        body = content
    
    # 使用正则表达式找到所有的代码块，并将其替换为占位符
    code_blocks = re.findall (r'```.*?```', body, re.DOTALL)
    for i, block in enumerate (code_blocks):
        body = body.replace (block, f'{{CODE_BLOCK_{i}}}')
    
    # 使用正则表达式找到所有的标题分栏，并将其替换为占位符
    toc_items = re.findall (r'- \[.*?\]\(#.*?\)', body)
    for i, item in enumerate (toc_items):
        body = body.replace (item, f'{{TOC_ITEM_{i}}}')
    
    # 使用正则表达式找到所有的加粗文本，并将其替换为占位符
    bold_texts = re.findall (r'\*\*.*?\*\*', body)
    for i, bold in enumerate (bold_texts):
        body = body.replace (bold, f'{{BOLD_TEXT_{i}}}')
    
    # 对非代码块、非标题分栏和非加粗文本部分进行 pangu 化处理
    body = spacing_text (body)
    
    # 将占位符替换回代码块
    for i, block in enumerate (code_blocks):
        body = body.replace (f'{{CODE_BLOCK_{i}}}', block)
    
    # 将占位符替换回标题分栏
    for i, item in enumerate (toc_items):
        body = body.replace (f'{{TOC_ITEM_{i}}}', item)
    
    # 将占位符替换回加粗文本
    for i, bold in enumerate (bold_texts):
        body = body.replace (f'{{BOLD_TEXT_{i}}}', bold)
    
    # 重新组合 YAML Front Matter 和处理后的正文
    new_content = front_matter + body
    
    with open (file_path, 'w', encoding='utf-8') as file:
        file.write (new_content)

def process_directory (directory):
    for root, _, files in os.walk (directory):
        for file in files:
            if file.endswith ('.md') or file.endswith ('.mdx'):
                file_path = os.path.join (root, file)
                process_file (file_path)

# 指定要处理的目录
directory_to_process = 'posts'
process_directory (directory_to_process)

这段代码没有对 python 的注释进行盘古化处理，因为 python 的注释是 # 开头的，这和 markdown 的标题有冲突

对于 YAML Front Matter

import os
import re
from pangu import spacing

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # 查找开头的 `---` 到 `---` 之间的内容
    match = re.search(r'^(---\n.*?\n---\n)(.*)', content, re.DOTALL)
    if match:
        front_matter = match.group(1)
        # 查找 [`title:`](command:_github.copilot.openSymbolFromReferences?%5B%7B%22%24mid%22%3A1%2C%22fsPath%22%3A%22d%3A%5C%5Cblog%5C%5Cblog%5C%5Ccontent%5C%5Cpangu2.py%22%2C%22_sep%22%3A1%2C%22path%22%3A%22%2Fd%3A%2Fblog%2Fblog%2Fcontent%2Fpangu2.py%22%2C%22scheme%22%3A%22file%22%7D%2C%7B%22line%22%3A18%2C%22character%22%3A12%7D%5D "d:\blog\blog\content\pangu2.py") 后面的内容
        title_match = re.search(r'(title:\s*)(.*)', front_matter)
        if title_match:
            title = title_match.group(2)
            # 对 title 进行 Pangu 化
            new_title = spacing(title)
            # 替换原来的 title
            new_front_matter = front_matter.replace(title, new_title)
            new_content = content.replace(front_matter, new_front_matter)

            # 将修改后的内容写回文件
            with open(file_path, 'w', encoding='utf-8') as file:
                file.write(new_content)

def process_directory(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.md') or file.endswith('.mdx'):
                process_file(os.path.join(root, file))

# 指定要处理的文件夹路径
directory_path = 'posts'
process_directory(directory_path)

对于 vs code 用户，也可以安装 pangu 插件。转到扩展（Extensions）视图，搜索 "Pangu" 并安装。用快捷键 Ctrl+Shift+P 打开命令面板，输入 "Pangu" 并选择 "Pangu: Add whitespace for all" 来自动修正当前文件中的中英文间距。

vscode 的拓展中有很多 pangu 插件，Pangu-Markdown 很不错