Browse Source

22

yangrongze
hanyuqing 3 months ago
parent
commit
d1974356de
  1. 172
      1218.py
  2. BIN
      icd10_skipped_records.xlsx
  3. 297716
      icd10_tree_preserve_order.json
  4. 343474
      icd10_tree_with_level.json

172
1218.py

@ -0,0 +1,172 @@
import pandas as pd
import json
import numpy as np
import re
# === 配置 ===
EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx"
OUTPUT_JSON_PATH = "icd10_tree_preserve_order.json"
# === 章 code 格式化函数(同前)===
def extract_chapter_number(text):
if not text:
return None
text = str(text).strip()
match = re.search(r'第([一二三四五六七八九十百\d]+)章', text)
if match:
num_str = match.group(1)
chinese_num_map = {
'': 1, '': 2, '': 3, '': 4, '': 5,
'': 6, '': 7, '': 8, '': 9, '': 10,
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
'十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
'二十一': 21, '二十二': 22
}
if num_str in chinese_num_map:
return chinese_num_map[num_str]
elif num_str.isdigit():
return int(num_str)
if text.isdigit():
return int(text)
if text.endswith('.') and text[:-1].isdigit():
return int(text[:-1])
return None
def format_chapter_code(original):
num = extract_chapter_number(original)
if num is not None:
return f"{num}"
return str(original).strip() if original else ""
# === 工具函数 ===
def get_clean(val):
if val is None or str(val).strip().lower() in ("", "nan", "none"):
return None
return str(val).strip()
def make_fields(code, name):
code_str = code or ""
label_str = name or ""
title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str)
return code_str, label_str, title_str
# === 构建有序树:每个节点的 children 是 list ===
class TreeNode:
def __init__(self, code="", label="", title=""):
self.code = code
self.label = label
self.title = title
self.children = [] # 用 list 保持顺序
self._child_key_set = set() # 用于快速去重:存储 (code, label)
def add_or_get_child(self, code, label, title):
key = (code, label)
if key in self._child_key_set:
# 已存在,返回已有节点
for child in self.children:
if child.code == code and child.label == label:
return child
else:
# 不存在,创建新节点
new_child = TreeNode(code=code, label=label, title=title)
self.children.append(new_child)
self._child_key_set.add(key)
return new_child
return None # 实际不会走到这里
# === 构建根节点 ===
root = TreeNode()
root.children = [] # 实际章节点挂在这里
chapter_map = {} # key: (code, label) -> TreeNode,避免重复创建章
# === 读取 Excel ===
df = pd.read_excel(
EXCEL_PATH,
header=1,
dtype=str,
engine='openpyxl'
)
df.columns = df.columns.astype(str).str.strip()
df = df.replace({np.nan: None})
for idx, row in df.iterrows():
raw_chapter = get_clean(row.get(""))
chapter_name = get_clean(row.get("章的名称"))
section_code = get_clean(row.get("节代码范围"))
section_name = get_clean(row.get("节名称"))
category_code = get_clean(row.get("类目代码"))
category_name = get_clean(row.get("类目名称"))
subcategory_code = get_clean(row.get("亚目代码"))
subcategory_name = get_clean(row.get("亚目名称"))
diagnosis_code = get_clean(row.get("条目(诊断)代码"))
diagnosis_name = get_clean(row.get("条目(诊断)名称"))
if not raw_chapter and not chapter_name:
continue
# === 章 ===
chapter_code = format_chapter_code(raw_chapter)
chap_label = chapter_name or chapter_code
chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code
chap_key = (chapter_code, chap_label)
if chap_key not in chapter_map:
chapter_node = TreeNode(code=chapter_code, label=chap_label, title=chap_title)
root.children.append(chapter_node)
chapter_map[chap_key] = chapter_node
else:
chapter_node = chapter_map[chap_key]
current = chapter_node
# === 节 ===
if section_code or section_name:
sec_code, sec_label, sec_title = make_fields(section_code, section_name)
current = current.add_or_get_child(sec_code, sec_label, sec_title)
# === 类目 ===
if category_code or category_name:
cat_code, cat_label, cat_title = make_fields(category_code, category_name)
current = current.add_or_get_child(cat_code, cat_label, cat_title)
# === 亚目 ===
if subcategory_code or subcategory_name:
sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name)
current = current.add_or_get_child(sub_code, sub_label, sub_title)
# === 条目 ===
if diagnosis_code or diagnosis_name:
diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name)
current.add_or_get_child(diag_code, diag_label, diag_title)
# === 转为带 id 的标准格式(DFS 顺序遍历)===
next_id = 1
def node_to_dict(node):
global next_id
item = {
"id": next_id,
"code": node.code,
"label": node.label,
"title": node.title
}
next_id += 1
if node.children:
item["children"] = [node_to_dict(child) for child in node.children]
return item
treeData = [node_to_dict(chap) for chap in root.children]
# === 保存 JSON ===
with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(treeData, f, ensure_ascii=False, indent=2)
print(f"✅ 树形结构已生成,共 {len(treeData)} 个章节点")
print(f"📄 输出文件: {OUTPUT_JSON_PATH}")

BIN
icd10_skipped_records.xlsx

Binary file not shown.

297716
icd10_tree_preserve_order.json

File diff suppressed because it is too large

343474
icd10_tree_with_level.json

File diff suppressed because it is too large
Loading…
Cancel
Save