|
|
|
@ -61,6 +61,7 @@ public class PointService { |
|
|
|
map.put("title",articles.get(i).getName()); |
|
|
|
map.put("authors",formatAuthorsWithAgencies(authors,agencies)); |
|
|
|
map.put("urls",""); |
|
|
|
// map.put("source",articles.get(i).getSourceName());
|
|
|
|
if (articles.get(i).getKeywords()==null){ |
|
|
|
map.put("keywords",""); |
|
|
|
}else{ |
|
|
|
@ -212,9 +213,11 @@ public class PointService { |
|
|
|
List<String> agencies = new ArrayList<>(new LinkedHashSet<>(parsed.get("agency"))); |
|
|
|
List<String> keywords = new ArrayList<>(new LinkedHashSet<>(parsed.get("keyword"))); |
|
|
|
String title = record.get("title"); |
|
|
|
String source = record.get("source"); |
|
|
|
String urls = record.get("urls"); |
|
|
|
String guanjianzi = record.get("keywords"); |
|
|
|
|
|
|
|
|
|
|
|
// 用于存储每个三元组对应的关系 ID
|
|
|
|
Map<String, List<Integer>> tripletToIds = new HashMap<>(); |
|
|
|
|
|
|
|
@ -247,6 +250,8 @@ public class PointService { |
|
|
|
params.put("title", title); |
|
|
|
params.put("url", urls); |
|
|
|
params.put("guanjianzi", guanjianzi); |
|
|
|
params.put("source", source); |
|
|
|
params.put("source_data", getYear(source)); |
|
|
|
zhyPointMapper.insertPointReship(params); |
|
|
|
Integer insertedId = Integer.valueOf(String.valueOf(params.get("id"))); |
|
|
|
rel.relationshipId = insertedId; |
|
|
|
@ -254,6 +259,7 @@ public class PointService { |
|
|
|
// 添加到 tripletToIds
|
|
|
|
tripletToIds.computeIfAbsent(tripletKey, k -> new ArrayList<>()).add(insertedId); |
|
|
|
} catch (Exception e) { |
|
|
|
System.out.println(e.getMessage()); |
|
|
|
System.out.println("并发导致重复数据"); |
|
|
|
} |
|
|
|
} |
|
|
|
@ -286,12 +292,15 @@ public class PointService { |
|
|
|
params.put("title", title); |
|
|
|
params.put("url", urls); |
|
|
|
params.put("guanjianzi", guanjianzi); |
|
|
|
params.put("source", source); |
|
|
|
params.put("source_data", getYear(source)); |
|
|
|
zhyPointMapper.insertPointReship(params); |
|
|
|
Integer insertedId = Integer.valueOf(String.valueOf(params.get("id"))); |
|
|
|
rel.relationshipId = insertedId; |
|
|
|
|
|
|
|
tripletToIds.computeIfAbsent(tripletKey, k -> new ArrayList<>()).add(insertedId); |
|
|
|
} catch (Exception e) { |
|
|
|
System.out.println(e.getMessage()); |
|
|
|
System.out.println("并发导致重复数据"); |
|
|
|
} |
|
|
|
} |
|
|
|
@ -324,12 +333,14 @@ public class PointService { |
|
|
|
params.put("title", title); |
|
|
|
params.put("url", urls); |
|
|
|
params.put("guanjianzi", guanjianzi); |
|
|
|
params.put("source", source); |
|
|
|
params.put("source_data", getYear(source)); |
|
|
|
zhyPointMapper.insertPointReship(params); |
|
|
|
Integer insertedId = Integer.valueOf(String.valueOf(params.get("id"))); |
|
|
|
rel.relationshipId = insertedId; |
|
|
|
|
|
|
|
tripletToIds.computeIfAbsent(tripletKey, k -> new ArrayList<>()).add(insertedId); |
|
|
|
} catch (Exception e) { |
|
|
|
System.out.println(e.getMessage()); |
|
|
|
System.out.println("并发导致重复数据"); |
|
|
|
} |
|
|
|
} |
|
|
|
@ -371,6 +382,22 @@ public class PointService { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public Integer getYear(String source){ |
|
|
|
// 定义匹配4位连续数字的正则表达式
|
|
|
|
Pattern pattern = Pattern.compile("\\d{4}"); |
|
|
|
Matcher matcher = pattern.matcher(source); |
|
|
|
|
|
|
|
String year = null; |
|
|
|
if (matcher.find()) { |
|
|
|
year = matcher.group(); // 提取第一个匹配的4位数字
|
|
|
|
} |
|
|
|
// 输出结果
|
|
|
|
if (year != null) { |
|
|
|
return Integer.valueOf(year); |
|
|
|
} else { |
|
|
|
return null; |
|
|
|
} |
|
|
|
} |
|
|
|
/** |
|
|
|
* 生成两个列表的笛卡尔积,并标记关系类型 |
|
|
|
*/ |
|
|
|
@ -559,7 +586,7 @@ public class PointService { |
|
|
|
String key = headers[i].trim(); |
|
|
|
String value = row[i]; |
|
|
|
|
|
|
|
if (Arrays.asList("main_keyword", "sub_keyword", "title", "authors", "keywords", "abstract", "url", "urls").contains(key)) { |
|
|
|
if (Arrays.asList("main_keyword", "sub_keyword", "title", "authors", "keywords", "abstract", "url", "urls","source").contains(key)) { |
|
|
|
rowMap.put(key, value); |
|
|
|
} |
|
|
|
} |
|
|
|
|