|
|
|
@ -27,7 +27,10 @@ public class PointService { |
|
|
|
Neo4jUtil neo4jUtil; |
|
|
|
|
|
|
|
public void buildPoint(String url) throws IOException { |
|
|
|
// String url="D:\\project\\gyx\\tupudata\\thesis\\thesis_results_20250914_201832.csv";
|
|
|
|
List<Map<String, String>> dataList = getCsvLine(url); |
|
|
|
System.out.println("2222222222222222222222"); |
|
|
|
System.out.println(dataList); |
|
|
|
Map<String,Integer> nameToId = new HashMap<>(); |
|
|
|
for (Map<String, String> record : dataList) { |
|
|
|
List<Map<String, String>> relationshipList = new ArrayList<>(); |
|
|
|
@ -87,6 +90,7 @@ public class PointService { |
|
|
|
relType = "相关作者"; |
|
|
|
} else if (authorStr != null && !authorStr.trim().isEmpty() && |
|
|
|
agencyStr != null && !agencyStr.trim().isEmpty()) { |
|
|
|
System.out.println("aaaaaaaaaaaaaaaaaaaaaaaaa"); |
|
|
|
start = authorStr; |
|
|
|
end = agencyStr; |
|
|
|
relType = "相关机构"; |
|
|
|
@ -208,7 +212,7 @@ public class PointService { |
|
|
|
List<String> keywords = new ArrayList<>(); |
|
|
|
|
|
|
|
// 1. 解析 keywords(字段名可根据 CSV 调整)
|
|
|
|
String keywordStr = record.get("searchword1"); |
|
|
|
String keywordStr = record.get("main_keyword"); |
|
|
|
if (keywordStr != null && !keywordStr.trim().isEmpty()) { |
|
|
|
String[] keywordArray = keywordStr.split(";"); // 中文分号分隔
|
|
|
|
for (String kw : keywordArray) { |
|
|
|
@ -276,33 +280,59 @@ public class PointService { |
|
|
|
throw new FileNotFoundException("CSV file not found: " + csvFilePath); |
|
|
|
} |
|
|
|
|
|
|
|
// 2. 读取 CSV 文件(使用 GBK 编码)
|
|
|
|
List<Map<String, String>> dataList = new ArrayList<>(); |
|
|
|
String[] headers = null; |
|
|
|
|
|
|
|
// 定义 CSV 格式
|
|
|
|
CSVFormat format = CSVFormat.DEFAULT |
|
|
|
.withFirstRecordAsHeader() |
|
|
|
.withIgnoreSurroundingSpaces(true) |
|
|
|
.withTrim() |
|
|
|
.withQuote('"') |
|
|
|
.withEscape('\\') |
|
|
|
.withIgnoreEmptyLines(true); |
|
|
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"))) { |
|
|
|
String line; |
|
|
|
int lineNumber = 0; |
|
|
|
|
|
|
|
try (FileInputStream fis = new FileInputStream(file); |
|
|
|
InputStreamReader isr = new InputStreamReader(fis, StandardCharsets.UTF_8); // ✅ 指定 GBK 编码
|
|
|
|
CSVParser csvParser = new CSVParser(isr, format)) { |
|
|
|
while ((line = reader.readLine()) != null) { |
|
|
|
lineNumber++; |
|
|
|
|
|
|
|
for (CSVRecord record : csvParser) { |
|
|
|
// 去除 BOM
|
|
|
|
if (lineNumber == 1 && line.startsWith("\uFEFF")) { |
|
|
|
line = line.substring(1); |
|
|
|
} |
|
|
|
|
|
|
|
// 使用更安全的 split:保留引号内内容
|
|
|
|
String[] row = parseCsvLine(line); // 改用自定义解析方法
|
|
|
|
|
|
|
|
// 去除每个字段首尾空白
|
|
|
|
for (int i = 0; i < row.length; i++) { |
|
|
|
row[i] = row[i].trim(); |
|
|
|
} |
|
|
|
|
|
|
|
if (lineNumber == 1) { |
|
|
|
headers = row; |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// 转为 Map
|
|
|
|
Map<String, String> rowMap = new HashMap<>(); |
|
|
|
for (String header : csvParser.getHeaderNames()) { |
|
|
|
rowMap.put(header, record.get(header)); |
|
|
|
for (int i = 0; i < headers.length && i < row.length; i++) { |
|
|
|
String key = headers[i].trim(); |
|
|
|
String value = row[i]; |
|
|
|
|
|
|
|
if (Arrays.asList("main_keyword", "sub_keyword", "title", "authors", "keywords", "abstract", "url", "urls").contains(key)) { |
|
|
|
rowMap.put(key, value); |
|
|
|
} |
|
|
|
} |
|
|
|
dataList.add(rowMap); |
|
|
|
} |
|
|
|
} catch (IOException e) { |
|
|
|
System.err.println("Error reading CSV file: " + e.getMessage()); |
|
|
|
throw e; |
|
|
|
e.printStackTrace(); |
|
|
|
} |
|
|
|
for (Map<String, String> data : dataList) { |
|
|
|
String mainKeyword = data.get("main_keyword"); |
|
|
|
Pattern pattern = Pattern.compile("'([^']*)'"); |
|
|
|
Matcher matcher = pattern.matcher(mainKeyword); |
|
|
|
while (matcher.find()) { |
|
|
|
data.put("main_keyword", matcher.group(1)); |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
return dataList; |
|
|
|
} |
|
|
|
private static String[] parseCsvLine(String line) { |
|
|
|
|