Commit 0f3f14c8 by 侯昆

1

1 parent 4c01ba9a
package com.dookay.cihai.core;
import com.dookay.coral.common.core.CoralCommonCoreMarker;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
......@@ -11,7 +12,7 @@ import org.springframework.boot.autoconfigure.SpringBootApplication;
CoralCommonCoreMarker.class,
CiHaiCoreApplication.class
})
@MapperScan(basePackageClasses = CiHaiCoreApplication.class)
public class CiHaiCoreApplication {
public static void main(String[] args) {
......
......@@ -53,6 +53,7 @@ public final class AipUtilBean {
private static final ConcurrentHashMap<String, Double> SCORE_MAP = new ConcurrentHashMap<>();
private static final String SCORE_KEY_PREFIX = "WORD_SCORE:";
private static final double CRITICAL_VALUE = 0.4D;
private static final double RELATE_CRITICAL_VALUE = 0.6D;
/**
* 内部错误
......@@ -81,7 +82,8 @@ public final class AipUtilBean {
// 首先尝试词法分析
List<LexerItem> lexerItems = getLexerItems(queryString);
List<LexerItem> filteredLexer = lexerItems.stream().filter(
l -> l.getPos().equals(LexerPosConst.NT) ||
l -> LexerNeConst.inThis(l.getNe()) ||
l.getPos().equals(LexerPosConst.NT) ||
l.getPos().equals(LexerPosConst.NR) ||
l.getPos().equals(LexerPosConst.NZ) ||
l.getPos().equals(LexerPosConst.NS))
......@@ -192,9 +194,34 @@ public final class AipUtilBean {
return map;
}
// public List<> generateWordsMap(List<String> words) {
//
// }
/**
* 建立关联
*
* @param words
* @return
* @author houkun
* @date 2017/12/7
*/
public List<WordRelation> generateWordsMap(List<String> words) {
ArrayList<WordRelation> wordRelations = new ArrayList<>();
for (String word1 : words) {
words.parallelStream()
.forEach(word -> {
if (!word.equals(word1)) {
double score = doSimnet(word1, word);
if (score > RELATE_CRITICAL_VALUE) {
WordRelation relation = new WordRelation();
relation.setWord1(word1);
relation.setWord2(word);
relation.setScore(score);
wordRelations.add(relation);
}
}
}
);
}
return wordRelations;
}
/**
......
package com.dookay.cihai.core.aip;
/*****************************************
* *
* @dookay.com Internet make it happen *
* ----------- ----------------------- *
* dddd ddddd Internet make it happen *
* o o o Internet make it happen *
* k k k Internet make it happen *
* a a a Internet make it happen *
* yyyy yyyyy Internet make it happen *
* ----------- ----------------------- *
* @dookay.com Internet make it happen *
* *
****************************************/
import lombok.Data;
/**
* 词之间的关联
*
* @author houkun
* @date 2017/12/7
*/
@Data
public class WordRelation {
private String word1;
private String word2;
private double score;
}
......@@ -16,6 +16,7 @@ package com.dookay.cihai.core;
import com.alibaba.fastjson.JSON;
import com.dookay.cihai.core.aip.AipUtilBean;
import com.dookay.cihai.core.aip.WordRelation;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.Assert;
import org.junit.Test;
......@@ -73,12 +74,15 @@ public class BaiduTest extends CihaiCoreApplicationTests {
String text = FileCopyUtils.copyToString(reader);
List<String> list = aipUtilBean.extractKeyWords("中国共产党第十九次全国代表大会", text, 15);
System.out.println(JSON.toJSONString(list));
Map<Pair<String, String>, Double> map = aipUtilBean.calcKeywordsRelated("中国共产党第十九次全国代表大会", list);
System.out.println(map);
for (String s : list) {
Map<Pair<String, String>, Double> map1 = aipUtilBean.calcKeywordsRelated(s, list);
System.out.println(map1);
}
// Map<Pair<String, String>, Double> map = aipUtilBean.calcKeywordsRelated("中国共产党第十九次全国代表大会", list);
// System.out.println(map);
// for (String s : list) {
// Map<Pair<String, String>, Double> map1 = aipUtilBean.calcKeywordsRelated(s, list);
// System.out.println(map1);
// }
List<WordRelation> wordRelations = aipUtilBean.generateWordsMap(list);
System.out.println(wordRelations);
System.out.println(wordRelations.size());
// Map<String, Long> map = aipUtilBean.extractNounWordsWithCount(text);
// System.out.print(map.toString());
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!