Commit 84d7a56b by 石头

Merge remote-tracking branch 'origin/master'

2 parents c5867c98 4c01ba9a
package com.dookay.cihai.core; package com.dookay.cihai.core;
import com.dookay.coral.common.core.CoralCommonCoreMarker; import com.dookay.coral.common.core.CoralCommonCoreMarker;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
/** /**
* 项目运行入口
*
* @author houkun * @author houkun
*/ */
@SpringBootApplication( @SpringBootApplication(scanBasePackageClasses = {
// 加载不同模块的配置与待注入的Bean
scanBasePackageClasses = {
CoralCommonCoreMarker.class, CoralCommonCoreMarker.class,
CihaiCoreApplication.class CiHaiCoreApplication.class
}) })
@MapperScan(basePackageClasses = CihaiCoreApplication.class)
public class CihaiCoreApplication { public class CiHaiCoreApplication {
public static void main(String[] args) { public static void main(String[] args) {
SpringApplication.run(CihaiCoreApplication.class, args); SpringApplication.run(CiHaiCoreApplication.class, args);
} }
} }
...@@ -53,6 +53,7 @@ public final class AipUtilBean { ...@@ -53,6 +53,7 @@ public final class AipUtilBean {
private static final ConcurrentHashMap<String, Double> SCORE_MAP = new ConcurrentHashMap<>(); private static final ConcurrentHashMap<String, Double> SCORE_MAP = new ConcurrentHashMap<>();
private static final String SCORE_KEY_PREFIX = "WORD_SCORE:"; private static final String SCORE_KEY_PREFIX = "WORD_SCORE:";
private static final double CRITICAL_VALUE = 0.4D; private static final double CRITICAL_VALUE = 0.4D;
/** /**
* 内部错误 * 内部错误
*/ */
...@@ -77,6 +78,17 @@ public final class AipUtilBean { ...@@ -77,6 +78,17 @@ public final class AipUtilBean {
* @date 2017/12/6 * @date 2017/12/6
*/ */
public String extractQueryKeyword(String queryString) throws JSONException { public String extractQueryKeyword(String queryString) throws JSONException {
// 首先尝试词法分析
List<LexerItem> lexerItems = getLexerItems(queryString);
List<LexerItem> filteredLexer = lexerItems.stream().filter(
l -> l.getPos().equals(LexerPosConst.NT) ||
l.getPos().equals(LexerPosConst.NR) ||
l.getPos().equals(LexerPosConst.NZ) ||
l.getPos().equals(LexerPosConst.NS))
.collect(Collectors.toList());
if (filteredLexer.size() == 1) {
return filteredLexer.get(0).getItem();
}
JSONObject res = doDepParser(queryString, 1); JSONObject res = doDepParser(queryString, 1);
JSONArray items = res.getJSONArray("items"); JSONArray items = res.getJSONArray("items");
List<DepParserItem> depParserItems = JSON.parseArray(items.toString(), DepParserItem.class); List<DepParserItem> depParserItems = JSON.parseArray(items.toString(), DepParserItem.class);
...@@ -84,6 +96,7 @@ public final class AipUtilBean { ...@@ -84,6 +96,7 @@ public final class AipUtilBean {
if (depParserItems.size() == 1) { if (depParserItems.size() == 1) {
return depParserItems.get(0).getWord(); return depParserItems.get(0).getWord();
} }
// 先找到句子的核心 // 先找到句子的核心
DepParserItem hed = depParserItems.stream().filter(d -> d.getDeprel().equals(DeprelConst.HED)).findFirst().orElse(null); DepParserItem hed = depParserItems.stream().filter(d -> d.getDeprel().equals(DeprelConst.HED)).findFirst().orElse(null);
List<DepParserItem> keywords = new ArrayList<>(); List<DepParserItem> keywords = new ArrayList<>();
...@@ -172,13 +185,17 @@ public final class AipUtilBean { ...@@ -172,13 +185,17 @@ public final class AipUtilBean {
Map<Pair<String, String>, Double> map = new HashMap<>(words.size()); Map<Pair<String, String>, Double> map = new HashMap<>(words.size());
words.parallelStream() words.parallelStream()
.forEach(word -> { .forEach(word -> {
double score = doSimEmbedding(keyword, word); double score = doSimnet(keyword, word);
map.put(new ImmutablePair<>(keyword, word), score); map.put(new ImmutablePair<>(keyword, word), score);
} }
); );
return map; return map;
} }
// public List<> generateWordsMap(List<String> words) {
//
// }
/** /**
* 获取词法分析项 * 获取词法分析项
...@@ -266,6 +283,11 @@ public final class AipUtilBean { ...@@ -266,6 +283,11 @@ public final class AipUtilBean {
if (StringUtils.isNotEmpty(scoreString)) { if (StringUtils.isNotEmpty(scoreString)) {
return Double.parseDouble(scoreString); return Double.parseDouble(scoreString);
} }
String key2 = getScoreKey(s2, s1);
scoreString = template.opsForValue().get(key2);
if (StringUtils.isNotEmpty(scoreString)) {
return Double.parseDouble(scoreString);
}
try { try {
JSONObject res = aipNlp.wordSimEmbedding(s1, s2); JSONObject res = aipNlp.wordSimEmbedding(s1, s2);
double score = 0; double score = 0;
...@@ -276,7 +298,6 @@ public final class AipUtilBean { ...@@ -276,7 +298,6 @@ public final class AipUtilBean {
if (INTERNAL_ERROR.equals(errorCode)) { if (INTERNAL_ERROR.equals(errorCode)) {
score = doSimEmbedding(s1, s2); score = doSimEmbedding(s1, s2);
} else { } else {
log.warn(res.toString());
score = doSimnet(s1, s2); score = doSimnet(s1, s2);
} }
} else { } else {
...@@ -300,6 +321,16 @@ public final class AipUtilBean { ...@@ -300,6 +321,16 @@ public final class AipUtilBean {
* @date 2017/12/6 * @date 2017/12/6
*/ */
private double doSimnet(String s1, String s2) { private double doSimnet(String s1, String s2) {
String key = getScoreKey(s1, s2);
String scoreString = template.opsForValue().get(key);
if (StringUtils.isNotEmpty(scoreString)) {
return Double.parseDouble(scoreString);
}
String key2 = getScoreKey(s2, s1);
scoreString = template.opsForValue().get(key2);
if (StringUtils.isNotEmpty(scoreString)) {
return Double.parseDouble(scoreString);
}
JSONObject res = aipNlp.simnet(s1, s2, new HashMap<>(0)); JSONObject res = aipNlp.simnet(s1, s2, new HashMap<>(0));
double score = 0; double score = 0;
try { try {
...@@ -314,6 +345,7 @@ public final class AipUtilBean { ...@@ -314,6 +345,7 @@ public final class AipUtilBean {
} }
} else { } else {
score = res.getDouble("score"); score = res.getDouble("score");
template.opsForValue().set(key, String.valueOf(score), 1, TimeUnit.HOURS);
} }
return score; return score;
} catch (JSONException e) { } catch (JSONException e) {
......
...@@ -50,11 +50,6 @@ public interface LexerPosConst { ...@@ -50,11 +50,6 @@ public interface LexerPosConst {
String NS = "ns"; String NS = "ns";
/** /**
* 处所名词
*/
String S = "s";
/**
* 名形词 * 名形词
*/ */
String AN = "an"; String AN = "an";
...@@ -66,6 +61,6 @@ public interface LexerPosConst { ...@@ -66,6 +61,6 @@ public interface LexerPosConst {
* @return * @return
*/ */
static boolean inThis(String pos) { static boolean inThis(String pos) {
return Arrays.asList(N, NR, NZ, NT, NS, S, AN).contains(pos); return Arrays.asList(N, NR, NZ, NT, NS, AN).contains(pos);
} }
} }
...@@ -40,23 +40,23 @@ public class BaiduTest extends CihaiCoreApplicationTests { ...@@ -40,23 +40,23 @@ public class BaiduTest extends CihaiCoreApplicationTests {
@Test @Test
public void testQuery() throws Exception { public void testQuery() throws Exception {
String text1 = "什么是山东"; String text1 = "什么是十九大";
String text2 = "解释一下山东"; String text2 = "解释一下十九大";
String text3 = "山东是什么"; // String text3 = "山东是什么";
String text4 = "山东是什么地方"; // String text4 = "山东是什么地方";
String text5 = "山东"; String text5 = "十九大";
String expectResult = "山东"; String expectResult = "十九大";
String keyword1 = aipUtilBean.extractQueryKeyword(text1); String keyword1 = aipUtilBean.extractQueryKeyword(text1);
String keyword2 = aipUtilBean.extractQueryKeyword(text2); String keyword2 = aipUtilBean.extractQueryKeyword(text2);
String keyword3 = aipUtilBean.extractQueryKeyword(text3); // String keyword3 = aipUtilBean.extractQueryKeyword(text3);
String keyword4 = aipUtilBean.extractQueryKeyword(text4); // String keyword4 = aipUtilBean.extractQueryKeyword(text4);
String keyword5 = aipUtilBean.extractQueryKeyword(text5); String keyword5 = aipUtilBean.extractQueryKeyword(text5);
Assert.assertEquals(expectResult, keyword1); Assert.assertEquals(expectResult, keyword1);
Assert.assertEquals(expectResult, keyword2); Assert.assertEquals(expectResult, keyword2);
Assert.assertEquals(expectResult, keyword3); // Assert.assertEquals(expectResult, keyword3);
Assert.assertEquals(expectResult, keyword4); // Assert.assertEquals(expectResult, keyword4);
Assert.assertEquals(expectResult, keyword5); Assert.assertEquals(expectResult, keyword5);
} }
...@@ -74,11 +74,11 @@ public class BaiduTest extends CihaiCoreApplicationTests { ...@@ -74,11 +74,11 @@ public class BaiduTest extends CihaiCoreApplicationTests {
List<String> list = aipUtilBean.extractKeyWords("中国共产党第十九次全国代表大会", text, 15); List<String> list = aipUtilBean.extractKeyWords("中国共产党第十九次全国代表大会", text, 15);
System.out.println(JSON.toJSONString(list)); System.out.println(JSON.toJSONString(list));
Map<Pair<String, String>, Double> map = aipUtilBean.calcKeywordsRelated("中国共产党第十九次全国代表大会", list); Map<Pair<String, String>, Double> map = aipUtilBean.calcKeywordsRelated("中国共产党第十九次全国代表大会", list);
System.out.println(map);
for (String s : list) { for (String s : list) {
Map<Pair<String, String>, Double> map1 = aipUtilBean.calcKeywordsRelated(s, list); Map<Pair<String, String>, Double> map1 = aipUtilBean.calcKeywordsRelated(s, list);
System.out.print(map1); System.out.println(map1);
} }
System.out.print(map);
// Map<String, Long> map = aipUtilBean.extractNounWordsWithCount(text); // Map<String, Long> map = aipUtilBean.extractNounWordsWithCount(text);
// System.out.print(map.toString()); // System.out.print(map.toString());
......
...@@ -13,7 +13,7 @@ import org.springframework.transaction.annotation.Transactional; ...@@ -13,7 +13,7 @@ import org.springframework.transaction.annotation.Transactional;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE) @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@Rollback @Rollback
@Transactional @Transactional
@Import(CihaiCoreApplication.class) @Import(CiHaiCoreApplication.class)
public class CihaiCoreApplicationTests { public class CihaiCoreApplicationTests {
@Test @Test
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!