4 changed files with 234 additions and 168 deletions
@ -0,0 +1,188 @@ |
|||||
|
package com.main.woka.Common.util; |
||||
|
|
||||
|
import edu.stanford.nlp.ling.CoreAnnotations; |
||||
|
import edu.stanford.nlp.ling.CoreLabel; |
||||
|
import edu.stanford.nlp.pipeline.Annotation; |
||||
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP; |
||||
|
import edu.stanford.nlp.util.StringUtils; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.util.*; |
||||
|
|
||||
|
|
||||
|
@Component |
||||
|
public class RelationUtil1 { |
||||
|
//标题分词
|
||||
|
public String analyzeRelationship(String title1, String title2) { |
||||
|
//载入properties 文件
|
||||
|
String[] args = new String[]{"-props", "CoreNLP-chinese.properties"}; |
||||
|
Properties properties = StringUtils.argsToProperties(args); |
||||
|
properties.setProperty("annotators", "tokenize, ssplit, pos"); |
||||
|
properties.setProperty("tokenize.language", "zh"); // 设置为中文
|
||||
|
|
||||
|
StanfordCoreNLP pipline = new StanfordCoreNLP(properties); |
||||
|
|
||||
|
// 分析第一个短语 "低温"
|
||||
|
String phrase1 = title1; |
||||
|
List<Map> ll = analyzePhrase(pipline, phrase1); |
||||
|
|
||||
|
|
||||
|
//循环第一个短语的分词
|
||||
|
String useTitle = ""; |
||||
|
|
||||
|
for(int i=0;i<ll.size();i++){ |
||||
|
if (ll.get(i).get("pos").equals("NN") || ll.get(i).get("pos").equals("JJ")){ |
||||
|
useTitle = useTitle+ll.get(i).get("text"); |
||||
|
}else { |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// 分析第二个短语 "低温的影响"
|
||||
|
String phrase2 = title2; |
||||
|
String result = analyzePhrase1(pipline, phrase2, useTitle); |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
public List<Map> analyzePhrase(StanfordCoreNLP pipeline, String phrase) { |
||||
|
List<Map> list = new ArrayList<>(); |
||||
|
Annotation document = new Annotation(phrase); |
||||
|
pipeline.annotate(document); |
||||
|
|
||||
|
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); |
||||
|
for (CoreLabel token : tokens) { |
||||
|
String word = token.get(CoreAnnotations.TextAnnotation.class); |
||||
|
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); |
||||
|
System.out.println(word + ": " + pos); |
||||
|
Map mm = new HashMap(); |
||||
|
mm.put("text",word); |
||||
|
mm.put("pos",pos); |
||||
|
list.add(mm); |
||||
|
} |
||||
|
return list; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
public String analyzePhrase1(StanfordCoreNLP pipeline, String phrase,String useTitle) { |
||||
|
Annotation document = new Annotation(phrase); |
||||
|
pipeline.annotate(document); |
||||
|
String title= ""; |
||||
|
String result =""; |
||||
|
Integer index = 0; |
||||
|
System.out.println(useTitle); |
||||
|
|
||||
|
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); |
||||
|
for(int i=0;i<tokens.size();i++){ |
||||
|
CoreLabel token = tokens.get(i); |
||||
|
String word = token.get(CoreAnnotations.TextAnnotation.class); |
||||
|
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); |
||||
|
title= title + word; |
||||
|
if(useTitle.equals(title)){ |
||||
|
//有相同词汇有关系
|
||||
|
index = i+1; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println(index); |
||||
|
if (index==0){ |
||||
|
return "属于"; |
||||
|
}else { |
||||
|
for(int j=index;j<tokens.size();j++){ |
||||
|
CoreLabel token1 = tokens.get(j); |
||||
|
String word1 = token1.get(CoreAnnotations.TextAnnotation.class); |
||||
|
String pos1 = token1.get(CoreAnnotations.PartOfSpeechAnnotation.class); |
||||
|
if(j==index){ |
||||
|
System.out.println(pos1); |
||||
|
if(word1.equals("的")){ |
||||
|
System.out.println("aaaaaaaaaaaaaaaa"); |
||||
|
}else { |
||||
|
if(pos1.equals("PU")){ |
||||
|
result = "属于"; |
||||
|
break; |
||||
|
} |
||||
|
if (!pos1.equals("CC") && !pos1.equals("DEG")){ |
||||
|
result= result + word1; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
|
||||
|
}else { |
||||
|
result= result + word1; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
if(result.equals("")){ |
||||
|
result = "属于"; |
||||
|
} |
||||
|
|
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
//将一句话分成多个词语
|
||||
|
public List<String> analyzeRelationshipLong(String title1) { |
||||
|
List<String> list = new ArrayList<>(); |
||||
|
String[] args = new String[]{"-props", "CoreNLP-chinese.properties"}; |
||||
|
Properties properties = StringUtils.argsToProperties(args); |
||||
|
properties.setProperty("annotators", "tokenize, ssplit, pos"); |
||||
|
properties.setProperty("tokenize.language", "zh"); // 设置为中文
|
||||
|
|
||||
|
StanfordCoreNLP pipline = new StanfordCoreNLP(properties); |
||||
|
|
||||
|
// 分析第一个短语 "低温"
|
||||
|
String phrase1 = title1; |
||||
|
List<Map> ll = analyzePhrase(pipline, phrase1); |
||||
|
|
||||
|
for(int i=0;i<ll.size();i++){ |
||||
|
if (ll.get(i).get("pos").equals("NN")){ |
||||
|
list.add(String.valueOf(ll.get(i).get("text"))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return list; |
||||
|
} |
||||
|
|
||||
|
|
||||
|
public List<String> analyzeRelationshipLong1(String title1) { |
||||
|
List<String> list = new ArrayList<>(); |
||||
|
String[] args = new String[]{"-props", "CoreNLP-chinese.properties"}; |
||||
|
Properties properties = StringUtils.argsToProperties(args); |
||||
|
properties.setProperty("annotators", "tokenize, ssplit, pos"); |
||||
|
properties.setProperty("tokenize.language", "zh"); // 设置为中文
|
||||
|
|
||||
|
StanfordCoreNLP pipline = new StanfordCoreNLP(properties); |
||||
|
|
||||
|
// 分析第一个短语 "低温"
|
||||
|
String phrase1 = title1; |
||||
|
List<Map> ll = analyzePhrase(pipline, phrase1); |
||||
|
|
||||
|
for(int i=0;i<ll.size();i++){ |
||||
|
if (ll.get(i).get("pos").equals("NN")){ |
||||
|
if(String.valueOf(ll.get(i).get("text")).charAt(0)!='<' && String.valueOf(ll.get(i).get("text")).charAt(0)!='&' && String.valueOf(ll.get(i).get("text")).charAt(String.valueOf(ll.get(i).get("text")).length()-1)!='t'){ |
||||
|
int hh = 0; |
||||
|
for(int b=0;b<list.size();b++){ |
||||
|
if(list.get(b).equals(String.valueOf(ll.get(i).get("text")))){ |
||||
|
hh = 1; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
if (hh == 0){ |
||||
|
list.add(String.valueOf(ll.get(i).get("text"))); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return list; |
||||
|
} |
||||
|
|
||||
|
} |
||||
Loading…
Reference in new issue