import json import re import os from neo4j import GraphDatabase # === 配置 === NEO4J_URI = "bolt://localhost:7687" NEO4J_USER = "neo4j" NEO4J_PASSWORD = "12345678" # 👈 请确保密码正确 RELATIONSHIP_FOLDER = r"D:\temp\669" BATCH_SIZE = 100 def sanitize_relationship_type(rel_type: str) -> str: """清理关系类型,确保合法""" if not isinstance(rel_type, str): rel_type = str(rel_type) sanitized = re.sub(r"[^a-zA-Z0-9_]", "", rel_type) if not sanitized or sanitized[0].isdigit(): sanitized = "REL_" + sanitized return sanitized or "RELATED" def extract_start_end(rel: dict): """兼容多种字段名提取 start/end""" for s_key, e_key in [("start", "end"), ("source", "target"), ("from", "to")]: s = rel.get(s_key) e = rel.get(e_key) if s is not None and e is not None: return s, e return None, None def load_relationships_from_file(filepath): """从单个 JSON 文件加载 relationships""" with open(filepath, "r", encoding="utf-8-sig") as f: data = json.load(f) relationships = [] if isinstance(data, list): for item in data: if isinstance(item, dict) and "relationships" in item: relationships.extend(item["relationships"]) elif isinstance(item, dict): relationships.append(item) elif isinstance(data, dict) and "relationships" in data: relationships = data["relationships"] else: relationships = data if isinstance(data, list) else [] return relationships def process_relationships(relationships): """清洗并验证关系列表""" valid_rels = [] for rel in relationships: start_id, end_id = extract_start_end(rel) rel_type = rel.get("type", "RELATED") props = rel.get("properties", {}) or {} if start_id is None or end_id is None: continue try: start_id = int(float(start_id)) end_id = int(float(end_id)) except (TypeError, ValueError): continue valid_rels.append({ "start": start_id, "end": end_id, "type": sanitize_relationship_type(rel_type), "props": props }) return valid_rels def import_relationships_in_batches(tx, rels, batch_size): total = len(rels) created_total = 0 for i in range(0, total, batch_size): batch = rels[i:i + batch_size] rel_groups = {} for rel in batch: rel_groups.setdefault(rel["type"], []).append({ "start": rel["start"], "end": rel["end"], "props": rel["props"] }) created_this_batch = 0 for rel_type, group in rel_groups.items(): cypher = f""" UNWIND $rels AS r MATCH (a {{nodeId: r.start}}) MATCH (b {{nodeId: r.end}}) WITH a, b, r WHERE a IS NOT NULL AND b IS NOT NULL MERGE (a)-[rel:`{rel_type}`]->(b) SET rel += r.props RETURN count(rel) AS c """ result = tx.run(cypher, rels=group).single() created_this_batch += result["c"] created_total += created_this_batch print(f" ➤ 本批创建关系: {created_this_batch} 条") return created_total def main(): driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) # 获取所有 JSON 文件,并按文件名排序(确保 relations_001.json 先于 002) json_files = [f for f in os.listdir(RELATIONSHIP_FOLDER) if f.startswith("relations_") and f.endswith(".json")] json_files.sort() # 按字典序排序,适用于 001, 002... 格式 if not json_files: print("❌ 文件夹中没有找到 relations_*.json 文件") return total_global_created = 0 total_global_processed = 0 print(f"📁 找到 {len(json_files)} 个关系文件,开始逐个导入...\n") for idx, filename in enumerate(json_files, 1): filepath = os.path.join(RELATIONSHIP_FOLDER, filename) print(f"\n📄 [{idx}/{len(json_files)}] 正在处理: {filename}") try: raw_rels = load_relationships_from_file(filepath) print(f" ➤ 原始关系数: {len(raw_rels)}") valid_rels = process_relationships(raw_rels) print(f" ➤ 有效关系数: {len(valid_rels)}") if not valid_rels: print(" ⚠️ 跳过:无有效关系") continue with driver.session() as session: created = session.execute_write(import_relationships_in_batches, valid_rels, BATCH_SIZE) total_global_created += created total_global_processed += len(valid_rels) print(f" ✅ 文件 {filename} 导入完成,创建 {created} 条关系") except Exception as e: print(f" ❌ 处理 {filename} 时出错: {e}") continue # 继续处理下一个文件 print("\n" + "="*60) print(f"🎉 全部导入完成!") print(f"📊 总共处理有效关系: {total_global_processed}") print(f"✅ 总共成功创建关系: {total_global_created}") driver.close() if __name__ == "__main__": main()