You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

36 lines
1.0 KiB

# batch_test.py
from cmekg_aligner import CMeKGAligner
aligner = CMeKGAligner(
uri="bolt://localhost:7687",
user="neo4j",
password="your_password"
)
# 模拟上万条数据(实际可从文件读取)
with open("input_terms.txt", "r", encoding="utf-8") as f:
terms = [line.strip() for line in f if line.strip()]
print(f"🔍 开始批量对齐 {len(terms)} 条实体...")
results = aligner.find_entities_batch(terms)
# 输出结果
for term in terms[:10]: # 只打印前10条示例
res = results[term]
if res:
print(f"'{term}''{res[0]}', {res[1]}")
else:
print(f"'{term}' → 未匹配")
# 可选:保存到 CSV
import csv
with open("batch_alignment_result.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerow(["原始词", "标准名", "类型"])
for term in terms:
res = results[term]
if res:
writer.writerow([term, res[0], res[1]])
else:
writer.writerow([term, "", ""])