13 changed files with 345005 additions and 176 deletions
@ -0,0 +1,165 @@ |
|||
import json |
|||
import re |
|||
import os |
|||
from neo4j import GraphDatabase |
|||
|
|||
# === 配置 === |
|||
NEO4J_URI = "bolt://localhost:7687" |
|||
NEO4J_USER = "neo4j" |
|||
NEO4J_PASSWORD = "12345678" # 👈 请确保密码正确 |
|||
RELATIONSHIP_FOLDER = r"D:\temp\669" |
|||
BATCH_SIZE = 100 |
|||
|
|||
|
|||
def sanitize_relationship_type(rel_type: str) -> str: |
|||
"""清理关系类型,确保合法""" |
|||
if not isinstance(rel_type, str): |
|||
rel_type = str(rel_type) |
|||
sanitized = re.sub(r"[^a-zA-Z0-9_]", "", rel_type) |
|||
if not sanitized or sanitized[0].isdigit(): |
|||
sanitized = "REL_" + sanitized |
|||
return sanitized or "RELATED" |
|||
|
|||
|
|||
def extract_start_end(rel: dict): |
|||
"""兼容多种字段名提取 start/end""" |
|||
for s_key, e_key in [("start", "end"), ("source", "target"), ("from", "to")]: |
|||
s = rel.get(s_key) |
|||
e = rel.get(e_key) |
|||
if s is not None and e is not None: |
|||
return s, e |
|||
return None, None |
|||
|
|||
|
|||
def load_relationships_from_file(filepath): |
|||
"""从单个 JSON 文件加载 relationships""" |
|||
with open(filepath, "r", encoding="utf-8-sig") as f: |
|||
data = json.load(f) |
|||
|
|||
relationships = [] |
|||
if isinstance(data, list): |
|||
for item in data: |
|||
if isinstance(item, dict) and "relationships" in item: |
|||
relationships.extend(item["relationships"]) |
|||
elif isinstance(item, dict): |
|||
relationships.append(item) |
|||
elif isinstance(data, dict) and "relationships" in data: |
|||
relationships = data["relationships"] |
|||
else: |
|||
relationships = data if isinstance(data, list) else [] |
|||
|
|||
return relationships |
|||
|
|||
|
|||
def process_relationships(relationships): |
|||
"""清洗并验证关系列表""" |
|||
valid_rels = [] |
|||
for rel in relationships: |
|||
start_id, end_id = extract_start_end(rel) |
|||
rel_type = rel.get("type", "RELATED") |
|||
props = rel.get("properties", {}) or {} |
|||
|
|||
if start_id is None or end_id is None: |
|||
continue |
|||
|
|||
try: |
|||
start_id = int(float(start_id)) |
|||
end_id = int(float(end_id)) |
|||
except (TypeError, ValueError): |
|||
continue |
|||
|
|||
valid_rels.append({ |
|||
"start": start_id, |
|||
"end": end_id, |
|||
"type": sanitize_relationship_type(rel_type), |
|||
"props": props |
|||
}) |
|||
return valid_rels |
|||
|
|||
|
|||
def import_relationships_in_batches(tx, rels, batch_size): |
|||
total = len(rels) |
|||
created_total = 0 |
|||
|
|||
for i in range(0, total, batch_size): |
|||
batch = rels[i:i + batch_size] |
|||
rel_groups = {} |
|||
for rel in batch: |
|||
rel_groups.setdefault(rel["type"], []).append({ |
|||
"start": rel["start"], |
|||
"end": rel["end"], |
|||
"props": rel["props"] |
|||
}) |
|||
|
|||
created_this_batch = 0 |
|||
for rel_type, group in rel_groups.items(): |
|||
cypher = f""" |
|||
UNWIND $rels AS r |
|||
MATCH (a {{nodeId: r.start}}) |
|||
MATCH (b {{nodeId: r.end}}) |
|||
WITH a, b, r |
|||
WHERE a IS NOT NULL AND b IS NOT NULL |
|||
MERGE (a)-[rel:`{rel_type}`]->(b) |
|||
SET rel += r.props |
|||
RETURN count(rel) AS c |
|||
""" |
|||
result = tx.run(cypher, rels=group).single() |
|||
created_this_batch += result["c"] |
|||
|
|||
created_total += created_this_batch |
|||
print(f" ➤ 本批创建关系: {created_this_batch} 条") |
|||
|
|||
return created_total |
|||
|
|||
|
|||
def main(): |
|||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) |
|||
|
|||
# 获取所有 JSON 文件,并按文件名排序(确保 relations_001.json 先于 002) |
|||
json_files = [f for f in os.listdir(RELATIONSHIP_FOLDER) if f.startswith("relations_") and f.endswith(".json")] |
|||
json_files.sort() # 按字典序排序,适用于 001, 002... 格式 |
|||
|
|||
if not json_files: |
|||
print("❌ 文件夹中没有找到 relations_*.json 文件") |
|||
return |
|||
|
|||
total_global_created = 0 |
|||
total_global_processed = 0 |
|||
|
|||
print(f"📁 找到 {len(json_files)} 个关系文件,开始逐个导入...\n") |
|||
|
|||
for idx, filename in enumerate(json_files, 1): |
|||
filepath = os.path.join(RELATIONSHIP_FOLDER, filename) |
|||
print(f"\n📄 [{idx}/{len(json_files)}] 正在处理: {filename}") |
|||
|
|||
try: |
|||
raw_rels = load_relationships_from_file(filepath) |
|||
print(f" ➤ 原始关系数: {len(raw_rels)}") |
|||
|
|||
valid_rels = process_relationships(raw_rels) |
|||
print(f" ➤ 有效关系数: {len(valid_rels)}") |
|||
|
|||
if not valid_rels: |
|||
print(" ⚠️ 跳过:无有效关系") |
|||
continue |
|||
|
|||
with driver.session() as session: |
|||
created = session.execute_write(import_relationships_in_batches, valid_rels, BATCH_SIZE) |
|||
|
|||
total_global_created += created |
|||
total_global_processed += len(valid_rels) |
|||
print(f" ✅ 文件 {filename} 导入完成,创建 {created} 条关系") |
|||
|
|||
except Exception as e: |
|||
print(f" ❌ 处理 {filename} 时出错: {e}") |
|||
continue # 继续处理下一个文件 |
|||
|
|||
print("\n" + "="*60) |
|||
print(f"🎉 全部导入完成!") |
|||
print(f"📊 总共处理有效关系: {total_global_processed}") |
|||
print(f"✅ 总共成功创建关系: {total_global_created}") |
|||
driver.close() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
|||
@ -0,0 +1,100 @@ |
|||
import pandas as pd |
|||
from neo4j import GraphDatabase |
|||
import numpy as np |
|||
|
|||
# === 配置 === |
|||
EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx" |
|||
NEO4J_URI = "bolt://localhost:7687" |
|||
NEO4J_USER = "neo4j" |
|||
NEO4J_PASSWORD = "12345678" |
|||
|
|||
# === 读取 Excel === |
|||
df = pd.read_excel( |
|||
EXCEL_PATH, |
|||
header=1, |
|||
dtype=str, # 关键:防止 A00.0 变成 A00 |
|||
engine='openpyxl' |
|||
) |
|||
|
|||
# 清理列名 |
|||
df.columns = df.columns.astype(str).str.strip() |
|||
|
|||
# 必需列 |
|||
required_cols = [ |
|||
"条目(诊断)名称", "条目(诊断)代码", |
|||
"亚目名称", "亚目代码", |
|||
"章代码范围", "节代码范围", "类目代码" |
|||
] |
|||
|
|||
# 检查列是否存在 |
|||
missing = [col for col in required_cols if col not in df.columns] |
|||
if missing: |
|||
raise ValueError(f"缺少必要列: {missing}") |
|||
|
|||
# 替换 NaN 为 None(便于后续判断) |
|||
df = df.replace({np.nan: None}) |
|||
|
|||
# === 构造 name 和 code 字段(按你的逻辑)=== |
|||
def get_disease_name(row): |
|||
diag_name = row["条目(诊断)名称"] |
|||
subcat_name = row["亚目名称"] |
|||
# 如果诊断名称为空(None 或 空白),用亚目名称 |
|||
if not diag_name or str(diag_name).strip() == "": |
|||
return str(subcat_name).strip() if subcat_name else None |
|||
return str(diag_name).strip() |
|||
|
|||
def get_diagnosis_code(row): |
|||
code = row["条目(诊断)代码"] |
|||
# 如果为空,返回空字符串 "" |
|||
if not code or str(code).strip() == "": |
|||
return "" |
|||
return str(code).strip() |
|||
|
|||
# 应用逻辑 |
|||
df["_disease_name"] = df.apply(get_disease_name, axis=1) |
|||
df["_diagnosis_code"] = df.apply(get_diagnosis_code, axis=1) |
|||
|
|||
# 过滤掉 name 仍为 None 的行(即 诊断名 + 亚目名 都为空) |
|||
df = df[df["_disease_name"].notna() & (df["_disease_name"] != "")] |
|||
|
|||
print(f"✅ 共准备 {len(df)} 条疾病记录用于导入") |
|||
|
|||
# === Neo4j 连接 === |
|||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) |
|||
|
|||
def upsert_disease(tx, record): |
|||
cypher = """ |
|||
MERGE (d:Disease {name: $name}) |
|||
ON CREATE SET |
|||
d.diagnosisCode = $diagnosisCode, |
|||
d.chapterRange = $chapterRange, |
|||
d.sectionRange = $sectionRange, |
|||
d.categoryCode = $categoryCode, |
|||
d.subcategoryCode = $subcategoryCode |
|||
ON MATCH SET |
|||
d.diagnosisCode = $diagnosisCode, |
|||
d.chapterRange = $chapterRange, |
|||
d.sectionRange = $sectionRange, |
|||
d.categoryCode = $categoryCode, |
|||
d.subcategoryCode = $subcategoryCode |
|||
""" |
|||
tx.run(cypher, { |
|||
"name": record["_disease_name"], |
|||
"diagnosisCode": record["_diagnosis_code"], |
|||
"chapterRange": record.get("章代码范围") or "", |
|||
"sectionRange": record.get("节代码范围") or "", |
|||
"categoryCode": record.get("类目代码") or "", |
|||
"subcategoryCode": record.get("亚目代码") or "" |
|||
}) |
|||
|
|||
# === 批量导入 === |
|||
with driver.session() as session: |
|||
for idx, row in df.iterrows(): |
|||
try: |
|||
session.execute_write(upsert_disease, row.to_dict()) |
|||
except Exception as e: |
|||
print(f"❌ 第 {idx + 2} 行失败: {e}") |
|||
continue |
|||
|
|||
print("✅ 数据导入完成!") |
|||
driver.close() |
|||
@ -0,0 +1,173 @@ |
|||
import pandas as pd |
|||
import json |
|||
import numpy as np |
|||
import re |
|||
|
|||
# === 配置 === |
|||
EXCEL_PATH = r"C:\Users\hanyuqing\Desktop\最新国家医保ICD编码\最新国家医保ICD编码\ICD-10医保版数据.xlsx" |
|||
OUTPUT_JSON_PATH = "icd10_tree_with_level.json" |
|||
|
|||
# === 章 code 格式化 === |
|||
def extract_chapter_number(text): |
|||
if not text: |
|||
return None |
|||
text = str(text).strip() |
|||
match = re.search(r'第([一二三四五六七八九十百\d]+)章', text) |
|||
if match: |
|||
num_str = match.group(1) |
|||
chinese_num_map = { |
|||
'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, |
|||
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10, |
|||
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, |
|||
'十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20, |
|||
'二十一': 21, '二十二': 22 |
|||
} |
|||
if num_str in chinese_num_map: |
|||
return chinese_num_map[num_str] |
|||
elif num_str.isdigit(): |
|||
return int(num_str) |
|||
if text.isdigit(): |
|||
return int(text) |
|||
if text.endswith('.') and text[:-1].isdigit(): |
|||
return int(text[:-1]) |
|||
return None |
|||
|
|||
def format_chapter_code(original): |
|||
num = extract_chapter_number(original) |
|||
if num is not None: |
|||
return f"第{num}章" |
|||
return str(original).strip() if original else "" |
|||
|
|||
def get_clean(val): |
|||
if val is None or str(val).strip().lower() in ("", "nan", "none"): |
|||
return None |
|||
return str(val).strip() |
|||
|
|||
def make_fields(code, name): |
|||
code_str = code or "" |
|||
label_str = name or "" |
|||
title_str = f"{code_str} {label_str}" if code_str and label_str else (code_str or label_str) |
|||
return code_str, label_str, title_str |
|||
|
|||
# === 带 level 的树节点 === |
|||
class TreeNode: |
|||
def __init__(self, code="", label="", title="", level=""): |
|||
self.code = code |
|||
self.label = label |
|||
self.title = title |
|||
self.level = level # 新增字段 |
|||
self.children = [] |
|||
self._child_key_set = set() # (code, label) |
|||
|
|||
def add_or_get_child(self, code, label, title, level): |
|||
key = (code, label) |
|||
if key in self._child_key_set: |
|||
for child in self.children: |
|||
if child.code == code and child.label == label: |
|||
return child |
|||
else: |
|||
new_child = TreeNode(code=code, label=label, title=title, level=level) |
|||
self.children.append(new_child) |
|||
self._child_key_set.add(key) |
|||
return new_child |
|||
return None |
|||
|
|||
# === 构建树 === |
|||
root = TreeNode() |
|||
chapter_map = {} |
|||
|
|||
df = pd.read_excel( |
|||
EXCEL_PATH, |
|||
header=1, |
|||
dtype=str, |
|||
engine='openpyxl' |
|||
) |
|||
df.columns = df.columns.astype(str).str.strip() |
|||
df = df.replace({np.nan: None}) |
|||
|
|||
for idx, row in df.iterrows(): |
|||
raw_chapter = get_clean(row.get("章")) |
|||
chapter_name = get_clean(row.get("章的名称")) |
|||
|
|||
section_code = get_clean(row.get("节代码范围")) |
|||
section_name = get_clean(row.get("节名称")) |
|||
|
|||
category_code = get_clean(row.get("类目代码")) |
|||
category_name = get_clean(row.get("类目名称")) |
|||
|
|||
subcategory_code = get_clean(row.get("亚目代码")) |
|||
subcategory_name = get_clean(row.get("亚目名称")) |
|||
|
|||
diagnosis_code = get_clean(row.get("条目(诊断)代码")) |
|||
diagnosis_name = get_clean(row.get("条目(诊断)名称")) |
|||
|
|||
if not raw_chapter and not chapter_name: |
|||
continue |
|||
|
|||
# === 章 === |
|||
chapter_code = format_chapter_code(raw_chapter) |
|||
chap_label = chapter_name or chapter_code |
|||
chap_title = f"{chapter_code} {chapter_name}" if chapter_name else chapter_code |
|||
chap_key = (chapter_code, chap_label) |
|||
|
|||
if chap_key not in chapter_map: |
|||
chapter_node = TreeNode( |
|||
code=chapter_code, |
|||
label=chap_label, |
|||
title=chap_title, |
|||
level="chapter" |
|||
) |
|||
root.children.append(chapter_node) |
|||
chapter_map[chap_key] = chapter_node |
|||
else: |
|||
chapter_node = chapter_map[chap_key] |
|||
|
|||
current = chapter_node |
|||
|
|||
# === 节 === |
|||
if section_code or section_name: |
|||
sec_code, sec_label, sec_title = make_fields(section_code, section_name) |
|||
current = current.add_or_get_child(sec_code, sec_label, sec_title, "section") |
|||
|
|||
# === 类目 === |
|||
if category_code or category_name: |
|||
cat_code, cat_label, cat_title = make_fields(category_code, category_name) |
|||
current = current.add_or_get_child(cat_code, cat_label, cat_title, "category") |
|||
|
|||
# === 亚目 === |
|||
if subcategory_code or subcategory_name: |
|||
sub_code, sub_label, sub_title = make_fields(subcategory_code, subcategory_name) |
|||
current = current.add_or_get_child(sub_code, sub_label, sub_title, "subcategory") |
|||
|
|||
# === 条目 === |
|||
if diagnosis_code or diagnosis_name: |
|||
diag_code, diag_label, diag_title = make_fields(diagnosis_code, diagnosis_name) |
|||
current.add_or_get_child(diag_code, diag_label, diag_title, "diagnosis") |
|||
|
|||
# === 转为带 id 的 dict === |
|||
next_id = 1 |
|||
|
|||
def node_to_dict(node): |
|||
global next_id |
|||
item = { |
|||
"id": next_id, |
|||
"code": node.code, |
|||
"label": node.label, |
|||
"title": node.title, |
|||
"level": node.level # 新增 |
|||
} |
|||
next_id += 1 |
|||
|
|||
if node.children: |
|||
item["children"] = [node_to_dict(child) for child in node.children] |
|||
|
|||
return item |
|||
|
|||
treeData = [node_to_dict(chap) for chap in root.children] |
|||
|
|||
# === 保存 === |
|||
with open(OUTPUT_JSON_PATH, 'w', encoding='utf-8') as f: |
|||
json.dump(treeData, f, ensure_ascii=False, indent=2) |
|||
|
|||
print(f"✅ 树形结构已生成,共 {len(treeData)} 个章节点") |
|||
print(f"📄 输出文件: {OUTPUT_JSON_PATH}") |
|||
File diff suppressed because it is too large
@ -1,4 +1,8 @@ |
|||
import { createApp } from 'vue' |
|||
import App from './App.vue' |
|||
import ElementPlus from 'element-plus' |
|||
import 'element-plus/dist/index.css' |
|||
|
|||
createApp(App).mount('#app') |
|||
const app = createApp(App) |
|||
app.use(ElementPlus) |
|||
app.mount('#app') |
|||
|
|||
Loading…
Reference in new issue