import pandas as pd import json import numpy as np import re # === 配置 === EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx" OUTPUT_JSON_PATH = "icd10_tree_preserve_order.json" # === 章 code 格式化函数(同前)=== def extract_chapter_number(text): if not text: return None text = str(text).strip() match = re.search(r'第([一二三四五六七八九十百\d]+)章', text) if match: num_str = match.group(1) chinese_num_map = { '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10, '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20, '二十一': 21, '二十二': 22 } if num_str in chinese_num_map: return chinese_num_map[num_str] elif num_str.isdigit(): return int(num_str) if text.isdigit(): return int(text) if text.endswith('.') and text[:-1].isdigit(): return int(text[:-1]) return None def format_chapter_code(original): num = extract_chapter_number(original) if num is not None: return f"第{num}章" return str(original).strip() if original else "" # === 工具函数 === def get_clean(val): if val is None or str(val).strip().lower() in ("", "nan", "none"): return None return str(val).strip() def make_fields(code, name): code_str = code or "" label_str = name or "" title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str) return code_str, label_str, title_str # === 构建有序树:每个节点的 children 是 list === class TreeNode: def __init__(self, code="", label="", title=""): self.code = code self.label = label self.title = title self.children = [] # 用 list 保持顺序 self._child_key_set = set() # 用于快速去重:存储 (code, label) def add_or_get_child(self, code, label, title): key = (code, label) if key in self._child_key_set: # 已存在,返回已有节点 for child in self.children: if child.code == code and child.label == label: return child else: # 不存在,创建新节点 new_child = TreeNode(code=code, label=label, title=title) self.children.append(new_child) self._child_key_set.add(key) return new_child return None # 实际不会走到这里 # === 构建根节点 === root = TreeNode() root.children = [] # 实际章节点挂在这里 chapter_map = {} # key: (code, label) -> TreeNode,避免重复创建章 # === 读取 Excel === df = pd.read_excel( EXCEL_PATH, header=1, dtype=str, engine='openpyxl' ) df.columns = df.columns.astype(str).str.strip() df = df.replace({np.nan: None}) for idx, row in df.iterrows(): raw_chapter = get_clean(row.get("章")) chapter_name = get_clean(row.get("章的名称")) section_code = get_clean(row.get("节代码范围")) section_name = get_clean(row.get("节名称")) category_code = get_clean(row.get("类目代码")) category_name = get_clean(row.get("类目名称")) subcategory_code = get_clean(row.get("亚目代码")) subcategory_name = get_clean(row.get("亚目名称")) diagnosis_code = get_clean(row.get("条目(诊断)代码")) diagnosis_name = get_clean(row.get("条目(诊断)名称")) if not raw_chapter and not chapter_name: continue # === 章 === chapter_code = format_chapter_code(raw_chapter) chap_label = chapter_name or chapter_code chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code chap_key = (chapter_code, chap_label) if chap_key not in chapter_map: chapter_node = TreeNode(code=chapter_code, label=chap_label, title=chap_title) root.children.append(chapter_node) chapter_map[chap_key] = chapter_node else: chapter_node = chapter_map[chap_key] current = chapter_node # === 节 === if section_code or section_name: sec_code, sec_label, sec_title = make_fields(section_code, section_name) current = current.add_or_get_child(sec_code, sec_label, sec_title) # === 类目 === if category_code or category_name: cat_code, cat_label, cat_title = make_fields(category_code, category_name) current = current.add_or_get_child(cat_code, cat_label, cat_title) # === 亚目 === if subcategory_code or subcategory_name: sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name) current = current.add_or_get_child(sub_code, sub_label, sub_title) # === 条目 === if diagnosis_code or diagnosis_name: diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name) current.add_or_get_child(diag_code, diag_label, diag_title) # === 转为带 id 的标准格式(DFS 顺序遍历)=== next_id = 1 def node_to_dict(node): global next_id item = { "id": next_id, "code": node.code, "label": node.label, "title": node.title } next_id += 1 if node.children: item["children"] = [node_to_dict(child) for child in node.children] return item treeData = [node_to_dict(chap) for chap in root.children] # === 保存 JSON === with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f: json.dump(treeData, f, ensure_ascii=False, indent=2) print(f"✅ 树形结构已生成,共 {len(treeData)} 个章节点") print(f"📄 输出文件: {OUTPUT_JSON_PATH}")