4 changed files with 641362 additions and 0 deletions
@ -0,0 +1,172 @@ |
|||
import pandas as pd |
|||
import json |
|||
import numpy as np |
|||
import re |
|||
|
|||
# === 配置 === |
|||
EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx" |
|||
OUTPUT_JSON_PATH = "icd10_tree_preserve_order.json" |
|||
|
|||
# === 章 code 格式化函数(同前)=== |
|||
def extract_chapter_number(text): |
|||
if not text: |
|||
return None |
|||
text = str(text).strip() |
|||
match = re.search(r'第([一二三四五六七八九十百\d]+)章', text) |
|||
if match: |
|||
num_str = match.group(1) |
|||
chinese_num_map = { |
|||
'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, |
|||
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10, |
|||
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, |
|||
'十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20, |
|||
'二十一': 21, '二十二': 22 |
|||
} |
|||
if num_str in chinese_num_map: |
|||
return chinese_num_map[num_str] |
|||
elif num_str.isdigit(): |
|||
return int(num_str) |
|||
if text.isdigit(): |
|||
return int(text) |
|||
if text.endswith('.') and text[:-1].isdigit(): |
|||
return int(text[:-1]) |
|||
return None |
|||
|
|||
def format_chapter_code(original): |
|||
num = extract_chapter_number(original) |
|||
if num is not None: |
|||
return f"第{num}章" |
|||
return str(original).strip() if original else "" |
|||
|
|||
# === 工具函数 === |
|||
def get_clean(val): |
|||
if val is None or str(val).strip().lower() in ("", "nan", "none"): |
|||
return None |
|||
return str(val).strip() |
|||
|
|||
def make_fields(code, name): |
|||
code_str = code or "" |
|||
label_str = name or "" |
|||
title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str) |
|||
return code_str, label_str, title_str |
|||
|
|||
# === 构建有序树:每个节点的 children 是 list === |
|||
class TreeNode: |
|||
def __init__(self, code="", label="", title=""): |
|||
self.code = code |
|||
self.label = label |
|||
self.title = title |
|||
self.children = [] # 用 list 保持顺序 |
|||
self._child_key_set = set() # 用于快速去重:存储 (code, label) |
|||
|
|||
def add_or_get_child(self, code, label, title): |
|||
key = (code, label) |
|||
if key in self._child_key_set: |
|||
# 已存在,返回已有节点 |
|||
for child in self.children: |
|||
if child.code == code and child.label == label: |
|||
return child |
|||
else: |
|||
# 不存在,创建新节点 |
|||
new_child = TreeNode(code=code, label=label, title=title) |
|||
self.children.append(new_child) |
|||
self._child_key_set.add(key) |
|||
return new_child |
|||
return None # 实际不会走到这里 |
|||
|
|||
# === 构建根节点 === |
|||
root = TreeNode() |
|||
root.children = [] # 实际章节点挂在这里 |
|||
|
|||
chapter_map = {} # key: (code, label) -> TreeNode,避免重复创建章 |
|||
|
|||
# === 读取 Excel === |
|||
df = pd.read_excel( |
|||
EXCEL_PATH, |
|||
header=1, |
|||
dtype=str, |
|||
engine='openpyxl' |
|||
) |
|||
df.columns = df.columns.astype(str).str.strip() |
|||
df = df.replace({np.nan: None}) |
|||
|
|||
for idx, row in df.iterrows(): |
|||
raw_chapter = get_clean(row.get("章")) |
|||
chapter_name = get_clean(row.get("章的名称")) |
|||
|
|||
section_code = get_clean(row.get("节代码范围")) |
|||
section_name = get_clean(row.get("节名称")) |
|||
|
|||
category_code = get_clean(row.get("类目代码")) |
|||
category_name = get_clean(row.get("类目名称")) |
|||
|
|||
subcategory_code = get_clean(row.get("亚目代码")) |
|||
subcategory_name = get_clean(row.get("亚目名称")) |
|||
|
|||
diagnosis_code = get_clean(row.get("条目(诊断)代码")) |
|||
diagnosis_name = get_clean(row.get("条目(诊断)名称")) |
|||
|
|||
if not raw_chapter and not chapter_name: |
|||
continue |
|||
|
|||
# === 章 === |
|||
chapter_code = format_chapter_code(raw_chapter) |
|||
chap_label = chapter_name or chapter_code |
|||
chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code |
|||
chap_key = (chapter_code, chap_label) |
|||
|
|||
if chap_key not in chapter_map: |
|||
chapter_node = TreeNode(code=chapter_code, label=chap_label, title=chap_title) |
|||
root.children.append(chapter_node) |
|||
chapter_map[chap_key] = chapter_node |
|||
else: |
|||
chapter_node = chapter_map[chap_key] |
|||
|
|||
current = chapter_node |
|||
|
|||
# === 节 === |
|||
if section_code or section_name: |
|||
sec_code, sec_label, sec_title = make_fields(section_code, section_name) |
|||
current = current.add_or_get_child(sec_code, sec_label, sec_title) |
|||
|
|||
# === 类目 === |
|||
if category_code or category_name: |
|||
cat_code, cat_label, cat_title = make_fields(category_code, category_name) |
|||
current = current.add_or_get_child(cat_code, cat_label, cat_title) |
|||
|
|||
# === 亚目 === |
|||
if subcategory_code or subcategory_name: |
|||
sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name) |
|||
current = current.add_or_get_child(sub_code, sub_label, sub_title) |
|||
|
|||
# === 条目 === |
|||
if diagnosis_code or diagnosis_name: |
|||
diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name) |
|||
current.add_or_get_child(diag_code, diag_label, diag_title) |
|||
|
|||
# === 转为带 id 的标准格式(DFS 顺序遍历)=== |
|||
next_id = 1 |
|||
|
|||
def node_to_dict(node): |
|||
global next_id |
|||
item = { |
|||
"id": next_id, |
|||
"code": node.code, |
|||
"label": node.label, |
|||
"title": node.title |
|||
} |
|||
next_id += 1 |
|||
|
|||
if node.children: |
|||
item["children"] = [node_to_dict(child) for child in node.children] |
|||
|
|||
return item |
|||
|
|||
treeData = [node_to_dict(chap) for chap in root.children] |
|||
|
|||
# === 保存 JSON === |
|||
with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f: |
|||
json.dump(treeData, f, ensure_ascii=False, indent=2) |
|||
|
|||
print(f"✅ 树形结构已生成,共 {len(treeData)} 个章节点") |
|||
print(f"📄 输出文件: {OUTPUT_JSON_PATH}") |
|||
Binary file not shown.
File diff suppressed because it is too large
File diff suppressed because it is too large
Loading…
Reference in new issue