|
|
|
@ -7,12 +7,17 @@ import com.ruoyi.api.mapper.ZhyPointMapper; |
|
|
|
import com.ruoyi.api.service.impl.PointService; |
|
|
|
import com.ruoyi.common.core.controller.BaseController; |
|
|
|
import com.ruoyi.common.utils.Neo4jUtil; |
|
|
|
import com.ruoyi.system.domain.ZhyArticle; |
|
|
|
import org.apache.ibatis.annotations.Param; |
|
|
|
import org.springframework.beans.factory.annotation.Autowired; |
|
|
|
import org.springframework.web.bind.annotation.*; |
|
|
|
|
|
|
|
import java.io.BufferedReader; |
|
|
|
import java.io.FileInputStream; |
|
|
|
import java.io.IOException; |
|
|
|
import java.io.InputStreamReader; |
|
|
|
import java.lang.reflect.Type; |
|
|
|
import java.nio.charset.StandardCharsets; |
|
|
|
import java.util.*; |
|
|
|
import java.util.regex.Matcher; |
|
|
|
import java.util.regex.Pattern; |
|
|
|
@ -36,6 +41,183 @@ public class TestXiaoTuPuController extends BaseController { |
|
|
|
private static final Gson gson = new GsonBuilder().create(); |
|
|
|
private static final Type LIST_MAP_TYPE = new TypeToken<List<Map<String, Object>>>(){}.getType(); |
|
|
|
|
|
|
|
@GetMapping("test10") |
|
|
|
public void test10() throws IOException { |
|
|
|
System.out.println(10); |
|
|
|
List<ZhyArticle> articles = new ArrayList<>(); |
|
|
|
|
|
|
|
System.out.println("有"); |
|
|
|
try (BufferedReader reader = new BufferedReader( |
|
|
|
new InputStreamReader(new FileInputStream("坦克.txt"), StandardCharsets.UTF_8))) { |
|
|
|
String line; |
|
|
|
ZhyArticle zhyArticle = null; |
|
|
|
String currentField = null; // 当前字段:"authors", "agencies", "summary"
|
|
|
|
|
|
|
|
while ((line = reader.readLine()) != null) { |
|
|
|
line = line.trim(); |
|
|
|
if (line.isEmpty()) continue; |
|
|
|
|
|
|
|
// ✅ 步骤1:检查前8个字符内是否有冒号(: 或 :)
|
|
|
|
int sepIndex = -1; |
|
|
|
char separator = '\0'; |
|
|
|
String prefix = line.substring(0, Math.min(8, line.length())); // 前最多8个字符
|
|
|
|
|
|
|
|
if (prefix.contains(":")) { |
|
|
|
sepIndex = prefix.indexOf(":"); |
|
|
|
separator = ':'; |
|
|
|
} else if (prefix.contains(":")) { |
|
|
|
sepIndex = prefix.indexOf(":"); |
|
|
|
separator = ':'; |
|
|
|
} |
|
|
|
|
|
|
|
// 判断是否是“字段行”(前8字符内有冒号)
|
|
|
|
if (sepIndex != -1) { |
|
|
|
String fieldKey = line.substring(0, sepIndex).trim(); |
|
|
|
String value = line.substring(sepIndex + (separator == ':' ? 1 : 1)).trim(); |
|
|
|
System.out.println(fieldKey); |
|
|
|
System.out.println(value); |
|
|
|
|
|
|
|
|
|
|
|
// ✅ 只处理你关心的四个字段
|
|
|
|
if ("题名".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
articles.add(zhyArticle); |
|
|
|
} |
|
|
|
zhyArticle = new ZhyArticle(); |
|
|
|
zhyArticle.setName(value); |
|
|
|
currentField = null; |
|
|
|
} else if ("作者".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
zhyArticle.setAuthors(value); |
|
|
|
currentField = "authors"; |
|
|
|
} |
|
|
|
} else if ("单位".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
zhyArticle.setAgencies(value); |
|
|
|
currentField = "agencies"; |
|
|
|
} |
|
|
|
}else if ("学位授予单位".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
zhyArticle.setAgencies(value); |
|
|
|
currentField = "agencies"; |
|
|
|
} |
|
|
|
} else if ("摘要".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
zhyArticle.setSummary(value); |
|
|
|
currentField = "summary"; |
|
|
|
} |
|
|
|
} else if ("关键词".equals(fieldKey)) { |
|
|
|
if (zhyArticle != null) { |
|
|
|
zhyArticle.setKeywords(value); |
|
|
|
currentField = "keywords"; |
|
|
|
} |
|
|
|
} |
|
|
|
// ✅ 如果字段不在白名单内(如“中文刊名”),则:
|
|
|
|
// - 不处理
|
|
|
|
// - 不追加
|
|
|
|
// - 重置 currentField(防止后续行追加到错误字段)
|
|
|
|
else { |
|
|
|
currentField = null; // 关键:断开与之前字段的联系
|
|
|
|
} |
|
|
|
} |
|
|
|
// ✅ 普通行:不是字段行,追加到 currentField
|
|
|
|
else { |
|
|
|
if (zhyArticle != null && currentField != null) { |
|
|
|
if ("authors".equals(currentField)) { |
|
|
|
String existing = zhyArticle.getAuthors(); |
|
|
|
zhyArticle.setAuthors((existing != null ? existing : "") + "\n" + line); |
|
|
|
} else if ("agencies".equals(currentField)) { |
|
|
|
String existing = zhyArticle.getAgencies(); |
|
|
|
zhyArticle.setAgencies((existing != null ? existing : "") + "\n" + line); |
|
|
|
} else if ("summary".equals(currentField)) { |
|
|
|
String existing = zhyArticle.getSummary(); |
|
|
|
zhyArticle.setSummary((existing != null ? existing : "") + "\n" + line); |
|
|
|
} |
|
|
|
} |
|
|
|
// 如果 currentField == null,说明前面是未知字段,跳过
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 添加最后一条记录
|
|
|
|
if (zhyArticle != null) { |
|
|
|
articles.add(zhyArticle); |
|
|
|
} |
|
|
|
|
|
|
|
} catch (IOException e) { |
|
|
|
System.err.println("读取文件时发生错误:" + e.getMessage()); |
|
|
|
e.printStackTrace(); |
|
|
|
} |
|
|
|
|
|
|
|
System.out.println(articles); |
|
|
|
pointService.buildPoint(articles,"坦克","维普"); |
|
|
|
//
|
|
|
|
//
|
|
|
|
// List<Map<String, String>> dataList = new ArrayList<>();
|
|
|
|
// for (int i = 0; i < articles.size(); i++) {
|
|
|
|
//
|
|
|
|
// String authorsstr = articles.get(i).getAuthors();
|
|
|
|
//
|
|
|
|
// if (authorsstr != null && !authorsstr.isEmpty()) {
|
|
|
|
// // 判断字符串长度大于0,且最后一个字符是 '等'
|
|
|
|
// if (authorsstr.charAt(authorsstr.length() - 1) == '等') {
|
|
|
|
// authorsstr = authorsstr.substring(0, authorsstr.length() - 1); // 去掉最后一个字符
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// // 按中文分号或英文逗号分割作者
|
|
|
|
// String[] authors = authorsstr.split("[;,;,]");
|
|
|
|
// String[] agencies = articles.get(i).getAgencies().split(",");
|
|
|
|
// if (agencies.length>1){
|
|
|
|
// for (int j = 0; j < authors.length; j++) {
|
|
|
|
// authors[j] = authors[j]+generateSequence(agencies.length);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// Map<String, String> map = new HashMap<>();
|
|
|
|
// map.put("main_keyword","坦克");
|
|
|
|
// map.put("title",articles.get(i).getName());
|
|
|
|
// map.put("authors",formatAuthorsWithAgencies(authors,agencies));
|
|
|
|
// map.put("urls","");
|
|
|
|
// if (articles.get(i).getKeywords()==null){
|
|
|
|
// map.put("keywords","");
|
|
|
|
// }else{
|
|
|
|
// map.put("keywords",articles.get(i).getKeywords().replace(",", ";"));
|
|
|
|
// }
|
|
|
|
// dataList.add(map);
|
|
|
|
// }
|
|
|
|
// System.out.println(dataList);
|
|
|
|
|
|
|
|
} |
|
|
|
public static String generateSequence(int n) { |
|
|
|
if (n <= 0) return ""; |
|
|
|
StringBuilder sb = new StringBuilder(); |
|
|
|
for (int i = 1; i <= n; i++) { |
|
|
|
sb.append(i); |
|
|
|
if (i < n) { |
|
|
|
sb.append(","); |
|
|
|
} |
|
|
|
} |
|
|
|
return sb.toString(); |
|
|
|
} |
|
|
|
public static String formatAuthorsWithAgencies(String[] authors, String[] agencies) { |
|
|
|
// 处理作者:去除空值或空字符串,并用中文逗号连接
|
|
|
|
String authorPart = Arrays.stream(authors) |
|
|
|
.map(String::trim) |
|
|
|
.filter(s -> !s.isEmpty()) |
|
|
|
.collect(Collectors.joining(",")); |
|
|
|
|
|
|
|
// 处理机构:去除空值或空字符串,并用中文分号连接
|
|
|
|
String agencyPart = Arrays.stream(agencies) |
|
|
|
.map(String::trim) |
|
|
|
.filter(s -> !s.isEmpty()) |
|
|
|
.collect(Collectors.joining(";")); |
|
|
|
|
|
|
|
// 拼接最终结果
|
|
|
|
if (!agencyPart.isEmpty()) { |
|
|
|
return authorPart + "(" + agencyPart + ")"; |
|
|
|
} else { |
|
|
|
return authorPart; |
|
|
|
} |
|
|
|
} |
|
|
|
@GetMapping("test1") |
|
|
|
public void test1(){ |
|
|
|
|
|
|
|
|