22

3 months ago · d1974356de
4 changed files with 641362 additions and 0 deletions
--- a/1218.py
+++ b/1218.py
@ -0,0 +1,172 @@
+import pandas as pd
+import json
+import numpy as np
+import re
+
+# === 配置 ===
+EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx"
+OUTPUT_JSON_PATH = "icd10_tree_preserve_order.json"
+
+# === 章 code 格式化函数（同前）===
+def extract_chapter_number(text):
+    if not text:
+        return None
+    text = str(text).strip()
+    match = re.search(r'第([一二三四五六七八九十百\d]+)章', text)
+    if match:
+        num_str = match.group(1)
+        chinese_num_map = {
+            '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
+            '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
+            '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
+            '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
+            '二十一': 21, '二十二': 22
+        }
+        if num_str in chinese_num_map:
+            return chinese_num_map[num_str]
+        elif num_str.isdigit():
+            return int(num_str)
+    if text.isdigit():
+        return int(text)
+    if text.endswith('.') and text[:-1].isdigit():
+        return int(text[:-1])
+    return None
+
+def format_chapter_code(original):
+    num = extract_chapter_number(original)
+    if num is not None:
+        return f"第{num}章"
+    return str(original).strip() if original else ""
+
+# === 工具函数 ===
+def get_clean(val):
+    if val is None or str(val).strip().lower() in ("", "nan", "none"):
+        return None
+    return str(val).strip()
+
+def make_fields(code, name):
+    code_str = code or ""
+    label_str = name or ""
+    title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str)
+    return code_str, label_str, title_str
+
+# === 构建有序树：每个节点的 children 是 list ===
+class TreeNode:
+    def __init__(self, code="", label="", title=""):
+        self.code = code
+        self.label = label
+        self.title = title
+        self.children = []  # 用 list 保持顺序
+        self._child_key_set = set()  # 用于快速去重：存储 (code, label)
+
+    def add_or_get_child(self, code, label, title):
+        key = (code, label)
+        if key in self._child_key_set:
+            # 已存在，返回已有节点
+            for child in self.children:
+                if child.code == code and child.label == label:
+                    return child
+        else:
+            # 不存在，创建新节点
+            new_child = TreeNode(code=code, label=label, title=title)
+            self.children.append(new_child)
+            self._child_key_set.add(key)
+            return new_child
+        return None  # 实际不会走到这里
+
+# === 构建根节点 ===
+root = TreeNode()
+root.children = []  # 实际章节点挂在这里
+
+chapter_map = {}  # key: (code, label) -> TreeNode，避免重复创建章
+
+# === 读取 Excel ===
+df = pd.read_excel(
+    EXCEL_PATH,
+    header=1,
+    dtype=str,
+    engine='openpyxl'
+)
+df.columns = df.columns.astype(str).str.strip()
+df = df.replace({np.nan: None})
+
+for idx, row in df.iterrows():
+    raw_chapter = get_clean(row.get("章"))
+    chapter_name = get_clean(row.get("章的名称"))
+
+    section_code = get_clean(row.get("节代码范围"))
+    section_name = get_clean(row.get("节名称"))
+
+    category_code = get_clean(row.get("类目代码"))
+    category_name = get_clean(row.get("类目名称"))
+
+    subcategory_code = get_clean(row.get("亚目代码"))
+    subcategory_name = get_clean(row.get("亚目名称"))
+
+    diagnosis_code = get_clean(row.get("条目（诊断）代码"))
+    diagnosis_name = get_clean(row.get("条目（诊断）名称"))
+
+    if not raw_chapter and not chapter_name:
+        continue
+
+    # === 章 ===
+    chapter_code = format_chapter_code(raw_chapter)
+    chap_label = chapter_name or chapter_code
+    chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code
+    chap_key = (chapter_code, chap_label)
+
+    if chap_key not in chapter_map:
+        chapter_node = TreeNode(code=chapter_code, label=chap_label, title=chap_title)
+        root.children.append(chapter_node)
+        chapter_map[chap_key] = chapter_node
+    else:
+        chapter_node = chapter_map[chap_key]
+
+    current = chapter_node
+
+    # === 节 ===
+    if section_code or section_name:
+        sec_code, sec_label, sec_title = make_fields(section_code, section_name)
+        current = current.add_or_get_child(sec_code, sec_label, sec_title)
+
+    # === 类目 ===
+    if category_code or category_name:
+        cat_code, cat_label, cat_title = make_fields(category_code, category_name)
+        current = current.add_or_get_child(cat_code, cat_label, cat_title)
+
+    # === 亚目 ===
+    if subcategory_code or subcategory_name:
+        sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name)
+        current = current.add_or_get_child(sub_code, sub_label, sub_title)
+
+    # === 条目 ===
+    if diagnosis_code or diagnosis_name:
+        diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name)
+        current.add_or_get_child(diag_code, diag_label, diag_title)
+
+# === 转为带 id 的标准格式（DFS 顺序遍历）===
+next_id = 1
+
+def node_to_dict(node):
+    global next_id
+    item = {
+        "id": next_id,
+        "code": node.code,
+        "label": node.label,
+        "title": node.title
+    }
+    next_id += 1
+
+    if node.children:
+        item["children"] = [node_to_dict(child) for child in node.children]
+
+    return item
+
+treeData = [node_to_dict(chap) for chap in root.children]
+
+# === 保存 JSON ===
+with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f:
+    json.dump(treeData, f, ensure_ascii=False, indent=2)
+
+print(f"✅ 树形结构已生成，共 {len(treeData)} 个章节点")
+print(f"📄 输出文件: {OUTPUT_JSON_PATH}")
--- a/icd10_skipped_records.xlsx
+++ b/icd10_skipped_records.xlsx
--- a/icd10_tree_preserve_order.json
+++ b/icd10_tree_preserve_order.json
--- a/icd10_tree_with_level.json
+++ b/icd10_tree_with_level.json