You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
173 lines
5.5 KiB
173 lines
5.5 KiB
|
3 months ago
|
import pandas as pd
|
||
|
|
import json
|
||
|
|
import numpy as np
|
||
|
|
import re
|
||
|
|
|
||
|
|
# === 配置 ===
|
||
|
|
EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx"
|
||
|
|
OUTPUT_JSON_PATH = "icd10_tree_with_level.json"
|
||
|
|
|
||
|
|
# === 章 code 格式化 ===
|
||
|
|
def extract_chapter_number(text):
|
||
|
|
if not text:
|
||
|
|
return None
|
||
|
|
text = str(text).strip()
|
||
|
|
match = re.search(r'第([一二三四五六七八九十百\d]+)章', text)
|
||
|
|
if match:
|
||
|
|
num_str = match.group(1)
|
||
|
|
chinese_num_map = {
|
||
|
|
'一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
|
||
|
|
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
|
||
|
|
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
|
||
|
|
'十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
|
||
|
|
'二十一': 21, '二十二': 22
|
||
|
|
}
|
||
|
|
if num_str in chinese_num_map:
|
||
|
|
return chinese_num_map[num_str]
|
||
|
|
elif num_str.isdigit():
|
||
|
|
return int(num_str)
|
||
|
|
if text.isdigit():
|
||
|
|
return int(text)
|
||
|
|
if text.endswith('.') and text[:-1].isdigit():
|
||
|
|
return int(text[:-1])
|
||
|
|
return None
|
||
|
|
|
||
|
|
def format_chapter_code(original):
|
||
|
|
num = extract_chapter_number(original)
|
||
|
|
if num is not None:
|
||
|
|
return f"第{num}章"
|
||
|
|
return str(original).strip() if original else ""
|
||
|
|
|
||
|
|
def get_clean(val):
|
||
|
|
if val is None or str(val).strip().lower() in ("", "nan", "none"):
|
||
|
|
return None
|
||
|
|
return str(val).strip()
|
||
|
|
|
||
|
|
def make_fields(code, name):
|
||
|
|
code_str = code or ""
|
||
|
|
label_str = name or ""
|
||
|
|
title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str)
|
||
|
|
return code_str, label_str, title_str
|
||
|
|
|
||
|
|
# === 带 level 的树节点 ===
|
||
|
|
class TreeNode:
|
||
|
|
def __init__(self, code="", label="", title="", level=""):
|
||
|
|
self.code = code
|
||
|
|
self.label = label
|
||
|
|
self.title = title
|
||
|
|
self.level = level # 新增字段
|
||
|
|
self.children = []
|
||
|
|
self._child_key_set = set() # (code, label)
|
||
|
|
|
||
|
|
def add_or_get_child(self, code, label, title, level):
|
||
|
|
key = (code, label)
|
||
|
|
if key in self._child_key_set:
|
||
|
|
for child in self.children:
|
||
|
|
if child.code == code and child.label == label:
|
||
|
|
return child
|
||
|
|
else:
|
||
|
|
new_child = TreeNode(code=code, label=label, title=title, level=level)
|
||
|
|
self.children.append(new_child)
|
||
|
|
self._child_key_set.add(key)
|
||
|
|
return new_child
|
||
|
|
return None
|
||
|
|
|
||
|
|
# === 构建树 ===
|
||
|
|
root = TreeNode()
|
||
|
|
chapter_map = {}
|
||
|
|
|
||
|
|
df = pd.read_excel(
|
||
|
|
EXCEL_PATH,
|
||
|
|
header=1,
|
||
|
|
dtype=str,
|
||
|
|
engine='openpyxl'
|
||
|
|
)
|
||
|
|
df.columns = df.columns.astype(str).str.strip()
|
||
|
|
df = df.replace({np.nan: None})
|
||
|
|
|
||
|
|
for idx, row in df.iterrows():
|
||
|
|
raw_chapter = get_clean(row.get("章"))
|
||
|
|
chapter_name = get_clean(row.get("章的名称"))
|
||
|
|
|
||
|
|
section_code = get_clean(row.get("节代码范围"))
|
||
|
|
section_name = get_clean(row.get("节名称"))
|
||
|
|
|
||
|
|
category_code = get_clean(row.get("类目代码"))
|
||
|
|
category_name = get_clean(row.get("类目名称"))
|
||
|
|
|
||
|
|
subcategory_code = get_clean(row.get("亚目代码"))
|
||
|
|
subcategory_name = get_clean(row.get("亚目名称"))
|
||
|
|
|
||
|
|
diagnosis_code = get_clean(row.get("条目(诊断)代码"))
|
||
|
|
diagnosis_name = get_clean(row.get("条目(诊断)名称"))
|
||
|
|
|
||
|
|
if not raw_chapter and not chapter_name:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# === 章 ===
|
||
|
|
chapter_code = format_chapter_code(raw_chapter)
|
||
|
|
chap_label = chapter_name or chapter_code
|
||
|
|
chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code
|
||
|
|
chap_key = (chapter_code, chap_label)
|
||
|
|
|
||
|
|
if chap_key not in chapter_map:
|
||
|
|
chapter_node = TreeNode(
|
||
|
|
code=chapter_code,
|
||
|
|
label=chap_label,
|
||
|
|
title=chap_title,
|
||
|
|
level="chapter"
|
||
|
|
)
|
||
|
|
root.children.append(chapter_node)
|
||
|
|
chapter_map[chap_key] = chapter_node
|
||
|
|
else:
|
||
|
|
chapter_node = chapter_map[chap_key]
|
||
|
|
|
||
|
|
current = chapter_node
|
||
|
|
|
||
|
|
# === 节 ===
|
||
|
|
if section_code or section_name:
|
||
|
|
sec_code, sec_label, sec_title = make_fields(section_code, section_name)
|
||
|
|
current = current.add_or_get_child(sec_code, sec_label, sec_title, "section")
|
||
|
|
|
||
|
|
# === 类目 ===
|
||
|
|
if category_code or category_name:
|
||
|
|
cat_code, cat_label, cat_title = make_fields(category_code, category_name)
|
||
|
|
current = current.add_or_get_child(cat_code, cat_label, cat_title, "category")
|
||
|
|
|
||
|
|
# === 亚目 ===
|
||
|
|
if subcategory_code or subcategory_name:
|
||
|
|
sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name)
|
||
|
|
current = current.add_or_get_child(sub_code, sub_label, sub_title, "subcategory")
|
||
|
|
|
||
|
|
# === 条目 ===
|
||
|
|
if diagnosis_code or diagnosis_name:
|
||
|
|
diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name)
|
||
|
|
current.add_or_get_child(diag_code, diag_label, diag_title, "diagnosis")
|
||
|
|
|
||
|
|
# === 转为带 id 的 dict ===
|
||
|
|
next_id = 1
|
||
|
|
|
||
|
|
def node_to_dict(node):
|
||
|
|
global next_id
|
||
|
|
item = {
|
||
|
|
"id": next_id,
|
||
|
|
"code": node.code,
|
||
|
|
"label": node.label,
|
||
|
|
"title": node.title,
|
||
|
|
"level": node.level # 新增
|
||
|
|
}
|
||
|
|
next_id += 1
|
||
|
|
|
||
|
|
if node.children:
|
||
|
|
item["children"] = [node_to_dict(child) for child in node.children]
|
||
|
|
|
||
|
|
return item
|
||
|
|
|
||
|
|
treeData = [node_to_dict(chap) for chap in root.children]
|
||
|
|
|
||
|
|
# === 保存 ===
|
||
|
|
with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f:
|
||
|
|
json.dump(treeData, f, ensure_ascii=False, indent=2)
|
||
|
|
|
||
|
|
print(f"✅ 树形结构已生成,共 {len(treeData)} 个章节点")
|
||
|
|
print(f"📄 输出文件: {OUTPUT_JSON_PATH}")
|