You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.0 KiB
36 lines
1.0 KiB
|
3 months ago
|
# batch_test.py
|
||
|
|
from cmekg_aligner import CMeKGAligner
|
||
|
|
|
||
|
|
aligner = CMeKGAligner(
|
||
|
|
uri="bolt://localhost:7687",
|
||
|
|
user="neo4j",
|
||
|
|
password="your_password"
|
||
|
|
)
|
||
|
|
|
||
|
|
# 模拟上万条数据(实际可从文件读取)
|
||
|
|
with open("input_terms.txt", "r", encoding="utf-8") as f:
|
||
|
|
terms = [line.strip() for line in f if line.strip()]
|
||
|
|
|
||
|
|
print(f"🔍 开始批量对齐 {len(terms)} 条实体...")
|
||
|
|
|
||
|
|
results = aligner.find_entities_batch(terms)
|
||
|
|
|
||
|
|
# 输出结果
|
||
|
|
for term in terms[:10]: # 只打印前10条示例
|
||
|
|
res = results[term]
|
||
|
|
if res:
|
||
|
|
print(f"✅ '{term}' → '{res[0]}', {res[1]}")
|
||
|
|
else:
|
||
|
|
print(f"❌ '{term}' → 未匹配")
|
||
|
|
|
||
|
|
# 可选:保存到 CSV
|
||
|
|
import csv
|
||
|
|
with open("batch_alignment_result.csv", "w", encoding="utf-8", newline="") as f:
|
||
|
|
writer = csv.writer(f)
|
||
|
|
writer.writerow(["原始词", "标准名", "类型"])
|
||
|
|
for term in terms:
|
||
|
|
res = results[term]
|
||
|
|
if res:
|
||
|
|
writer.writerow([term, res[0], res[1]])
|
||
|
|
else:
|
||
|
|
writer.writerow([term, "", ""])
|