KGPython/python/icd_parse_name.py


								import pandas as pd

								import json

								import numpy as np

								import re


								# === 配置 ===

								EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx"

								OUTPUT_JSON_PATH = "icd10_tree_preserve_order.json"


								# === 章 code 格式化函数（同前）===

								def extract_chapter_number(text):

								    if not text:

								        return None

								    text = str(text).strip()

								    match = re.search(r'第([一二三四五六七八九十百\d]+)章', text)

								    if match:

								        num_str = match.group(1)

								        chinese_num_map = {

								            '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,

								            '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,

								            '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,

								            '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,

								            '二十一': 21, '二十二': 22

								        }

								        if num_str in chinese_num_map:

								            return chinese_num_map[num_str]

								        elif num_str.isdigit():

								            return int(num_str)

								    if text.isdigit():

								        return int(text)

								    if text.endswith('.') and text[:-1].isdigit():

								        return int(text[:-1])

								    return None


								def format_chapter_code(original):

								    num = extract_chapter_number(original)

								    if num is not None:

								        return f"第{num}章"

								    return str(original).strip() if original else ""


								# === 工具函数 ===

								def get_clean(val):

								    if val is None or str(val).strip().lower() in ("", "nan", "none"):

								        return None

								    return str(val).strip()


								def make_fields(code, name):

								    code_str = code or ""

								    label_str = name or ""

								    title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str)

								    return code_str, label_str, title_str


								# === 构建有序树：每个节点的 children 是 list ===

								class TreeNode:

								    def __init__(self, code="", label="", title=""):

								        self.code = code

								        self.label = label

								        self.title = title

								        self.children = []  # 用 list 保持顺序

								        self._child_key_set = set()  # 用于快速去重：存储 (code, label)


								    def add_or_get_child(self, code, label, title):

								        key = (code, label)

								        if key in self._child_key_set:

								            # 已存在，返回已有节点

								            for child in self.children:

								                if child.code == code and child.label == label:

								                    return child

								        else:

								            # 不存在，创建新节点

								            new_child = TreeNode(code=code, label=label, title=title)

								            self.children.append(new_child)

								            self._child_key_set.add(key)

								            return new_child

								        return None  # 实际不会走到这里


								# === 构建根节点 ===

								root = TreeNode()

								root.children = []  # 实际章节点挂在这里


								chapter_map = {}  # key: (code, label) -> TreeNode，避免重复创建章


								# === 读取 Excel ===

								df = pd.read_excel(

								    EXCEL_PATH,

								    header=1,

								    dtype=str,

								    engine='openpyxl'

								)

								df.columns = df.columns.astype(str).str.strip()

								df = df.replace({np.nan: None})


								for idx, row in df.iterrows():

								    raw_chapter = get_clean(row.get("章"))

								    chapter_name = get_clean(row.get("章的名称"))


								    section_code = get_clean(row.get("节代码范围"))

								    section_name = get_clean(row.get("节名称"))


								    category_code = get_clean(row.get("类目代码"))

								    category_name = get_clean(row.get("类目名称"))


								    subcategory_code = get_clean(row.get("亚目代码"))

								    subcategory_name = get_clean(row.get("亚目名称"))


								    diagnosis_code = get_clean(row.get("条目（诊断）代码"))

								    diagnosis_name = get_clean(row.get("条目（诊断）名称"))


								    if not raw_chapter and not chapter_name:

								        continue


								    # === 章 ===

								    chapter_code = format_chapter_code(raw_chapter)

								    chap_label = chapter_name or chapter_code

								    chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code

								    chap_key = (chapter_code, chap_label)


								    if chap_key not in chapter_map:

								        chapter_node = TreeNode(code=chapter_code, label=chap_label, title=chap_title)

								        root.children.append(chapter_node)

								        chapter_map[chap_key] = chapter_node

								    else:

								        chapter_node = chapter_map[chap_key]


								    current = chapter_node


								    # === 节 ===

								    if section_code or section_name:

								        sec_code, sec_label, sec_title = make_fields(section_code, section_name)

								        current = current.add_or_get_child(sec_code, sec_label, sec_title)


								    # === 类目 ===

								    if category_code or category_name:

								        cat_code, cat_label, cat_title = make_fields(category_code, category_name)

								        current = current.add_or_get_child(cat_code, cat_label, cat_title)


								    # === 亚目 ===

								    if subcategory_code or subcategory_name:

								        sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name)

								        current = current.add_or_get_child(sub_code, sub_label, sub_title)


								    # === 条目 ===

								    if diagnosis_code or diagnosis_name:

								        diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name)

								        current.add_or_get_child(diag_code, diag_label, diag_title)


								# === 转为带 id 的标准格式（DFS 顺序遍历）===

								next_id = 1


								def node_to_dict(node):

								    global next_id

								    item = {

								        "id": next_id,

								        "code": node.code,

								        "label": node.label,

								        "title": node.title

								    }

								    next_id += 1


								    if node.children:

								        item["children"] = [node_to_dict(child) for child in node.children]


								    return item


								treeData = [node_to_dict(chap) for chap in root.children]


								# === 保存 JSON ===

								with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f:

								    json.dump(treeData, f, ensure_ascii=False, indent=2)


								print(f"✅ 树形结构已生成，共 {len(treeData)} 个章节点")

								print(f"📄 输出文件: {OUTPUT_JSON_PATH}")