Skip to content
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation
This project
Loading...
Sign in
石磊
/
cihai
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit efff46e6
authored
Dec 07, 2017
by
侯昆
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
关键词搜索首先尝试词法分析
1 parent
ceef850a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
20 deletions
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/AipUtilBean.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/LexerPosConst.java
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduTest.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/AipUtilBean.java
View file @
efff46e
...
@@ -53,6 +53,7 @@ public final class AipUtilBean {
...
@@ -53,6 +53,7 @@ public final class AipUtilBean {
private
static
final
ConcurrentHashMap
<
String
,
Double
>
SCORE_MAP
=
new
ConcurrentHashMap
<>();
private
static
final
ConcurrentHashMap
<
String
,
Double
>
SCORE_MAP
=
new
ConcurrentHashMap
<>();
private
static
final
String
SCORE_KEY_PREFIX
=
"WORD_SCORE:"
;
private
static
final
String
SCORE_KEY_PREFIX
=
"WORD_SCORE:"
;
private
static
final
double
CRITICAL_VALUE
=
0.4
D
;
private
static
final
double
CRITICAL_VALUE
=
0.4
D
;
/**
/**
* 内部错误
* 内部错误
*/
*/
...
@@ -77,6 +78,17 @@ public final class AipUtilBean {
...
@@ -77,6 +78,17 @@ public final class AipUtilBean {
* @date 2017/12/6
* @date 2017/12/6
*/
*/
public
String
extractQueryKeyword
(
String
queryString
)
throws
JSONException
{
public
String
extractQueryKeyword
(
String
queryString
)
throws
JSONException
{
// 首先尝试词法分析
List
<
LexerItem
>
lexerItems
=
getLexerItems
(
queryString
);
List
<
LexerItem
>
filteredLexer
=
lexerItems
.
stream
().
filter
(
l
->
l
.
getPos
().
equals
(
LexerPosConst
.
NT
)
||
l
.
getPos
().
equals
(
LexerPosConst
.
NR
)
||
l
.
getPos
().
equals
(
LexerPosConst
.
NZ
)
||
l
.
getPos
().
equals
(
LexerPosConst
.
NS
))
.
collect
(
Collectors
.
toList
());
if
(
filteredLexer
.
size
()
==
1
)
{
return
filteredLexer
.
get
(
0
).
getItem
();
}
JSONObject
res
=
doDepParser
(
queryString
,
1
);
JSONObject
res
=
doDepParser
(
queryString
,
1
);
JSONArray
items
=
res
.
getJSONArray
(
"items"
);
JSONArray
items
=
res
.
getJSONArray
(
"items"
);
List
<
DepParserItem
>
depParserItems
=
JSON
.
parseArray
(
items
.
toString
(),
DepParserItem
.
class
);
List
<
DepParserItem
>
depParserItems
=
JSON
.
parseArray
(
items
.
toString
(),
DepParserItem
.
class
);
...
@@ -84,6 +96,7 @@ public final class AipUtilBean {
...
@@ -84,6 +96,7 @@ public final class AipUtilBean {
if
(
depParserItems
.
size
()
==
1
)
{
if
(
depParserItems
.
size
()
==
1
)
{
return
depParserItems
.
get
(
0
).
getWord
();
return
depParserItems
.
get
(
0
).
getWord
();
}
}
// 先找到句子的核心
// 先找到句子的核心
DepParserItem
hed
=
depParserItems
.
stream
().
filter
(
d
->
d
.
getDeprel
().
equals
(
DeprelConst
.
HED
)).
findFirst
().
orElse
(
null
);
DepParserItem
hed
=
depParserItems
.
stream
().
filter
(
d
->
d
.
getDeprel
().
equals
(
DeprelConst
.
HED
)).
findFirst
().
orElse
(
null
);
List
<
DepParserItem
>
keywords
=
new
ArrayList
<>();
List
<
DepParserItem
>
keywords
=
new
ArrayList
<>();
...
@@ -172,13 +185,17 @@ public final class AipUtilBean {
...
@@ -172,13 +185,17 @@ public final class AipUtilBean {
Map
<
Pair
<
String
,
String
>,
Double
>
map
=
new
HashMap
<>(
words
.
size
());
Map
<
Pair
<
String
,
String
>,
Double
>
map
=
new
HashMap
<>(
words
.
size
());
words
.
parallelStream
()
words
.
parallelStream
()
.
forEach
(
word
->
{
.
forEach
(
word
->
{
double
score
=
doSim
Embedding
(
keyword
,
word
);
double
score
=
doSim
net
(
keyword
,
word
);
map
.
put
(
new
ImmutablePair
<>(
keyword
,
word
),
score
);
map
.
put
(
new
ImmutablePair
<>(
keyword
,
word
),
score
);
}
}
);
);
return
map
;
return
map
;
}
}
// public List<> generateWordsMap(List<String> words) {
//
// }
/**
/**
* 获取词法分析项
* 获取词法分析项
...
@@ -266,6 +283,11 @@ public final class AipUtilBean {
...
@@ -266,6 +283,11 @@ public final class AipUtilBean {
if
(
StringUtils
.
isNotEmpty
(
scoreString
))
{
if
(
StringUtils
.
isNotEmpty
(
scoreString
))
{
return
Double
.
parseDouble
(
scoreString
);
return
Double
.
parseDouble
(
scoreString
);
}
}
String
key2
=
getScoreKey
(
s2
,
s1
);
scoreString
=
template
.
opsForValue
().
get
(
key2
);
if
(
StringUtils
.
isNotEmpty
(
scoreString
))
{
return
Double
.
parseDouble
(
scoreString
);
}
try
{
try
{
JSONObject
res
=
aipNlp
.
wordSimEmbedding
(
s1
,
s2
);
JSONObject
res
=
aipNlp
.
wordSimEmbedding
(
s1
,
s2
);
double
score
=
0
;
double
score
=
0
;
...
@@ -276,7 +298,6 @@ public final class AipUtilBean {
...
@@ -276,7 +298,6 @@ public final class AipUtilBean {
if
(
INTERNAL_ERROR
.
equals
(
errorCode
))
{
if
(
INTERNAL_ERROR
.
equals
(
errorCode
))
{
score
=
doSimEmbedding
(
s1
,
s2
);
score
=
doSimEmbedding
(
s1
,
s2
);
}
else
{
}
else
{
log
.
warn
(
res
.
toString
());
score
=
doSimnet
(
s1
,
s2
);
score
=
doSimnet
(
s1
,
s2
);
}
}
}
else
{
}
else
{
...
@@ -300,6 +321,16 @@ public final class AipUtilBean {
...
@@ -300,6 +321,16 @@ public final class AipUtilBean {
* @date 2017/12/6
* @date 2017/12/6
*/
*/
private
double
doSimnet
(
String
s1
,
String
s2
)
{
private
double
doSimnet
(
String
s1
,
String
s2
)
{
String
key
=
getScoreKey
(
s1
,
s2
);
String
scoreString
=
template
.
opsForValue
().
get
(
key
);
if
(
StringUtils
.
isNotEmpty
(
scoreString
))
{
return
Double
.
parseDouble
(
scoreString
);
}
String
key2
=
getScoreKey
(
s2
,
s1
);
scoreString
=
template
.
opsForValue
().
get
(
key2
);
if
(
StringUtils
.
isNotEmpty
(
scoreString
))
{
return
Double
.
parseDouble
(
scoreString
);
}
JSONObject
res
=
aipNlp
.
simnet
(
s1
,
s2
,
new
HashMap
<>(
0
));
JSONObject
res
=
aipNlp
.
simnet
(
s1
,
s2
,
new
HashMap
<>(
0
));
double
score
=
0
;
double
score
=
0
;
try
{
try
{
...
@@ -314,6 +345,7 @@ public final class AipUtilBean {
...
@@ -314,6 +345,7 @@ public final class AipUtilBean {
}
}
}
else
{
}
else
{
score
=
res
.
getDouble
(
"score"
);
score
=
res
.
getDouble
(
"score"
);
template
.
opsForValue
().
set
(
key
,
String
.
valueOf
(
score
),
1
,
TimeUnit
.
HOURS
);
}
}
return
score
;
return
score
;
}
catch
(
JSONException
e
)
{
}
catch
(
JSONException
e
)
{
...
...
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/LexerPosConst.java
View file @
efff46e
...
@@ -50,11 +50,6 @@ public interface LexerPosConst {
...
@@ -50,11 +50,6 @@ public interface LexerPosConst {
String
NS
=
"ns"
;
String
NS
=
"ns"
;
/**
/**
* 处所名词
*/
String
S
=
"s"
;
/**
* 名形词
* 名形词
*/
*/
String
AN
=
"an"
;
String
AN
=
"an"
;
...
@@ -66,6 +61,6 @@ public interface LexerPosConst {
...
@@ -66,6 +61,6 @@ public interface LexerPosConst {
* @return
* @return
*/
*/
static
boolean
inThis
(
String
pos
)
{
static
boolean
inThis
(
String
pos
)
{
return
Arrays
.
asList
(
N
,
NR
,
NZ
,
NT
,
NS
,
S
,
AN
).
contains
(
pos
);
return
Arrays
.
asList
(
N
,
NR
,
NZ
,
NT
,
NS
,
AN
).
contains
(
pos
);
}
}
}
}
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduTest.java
View file @
efff46e
...
@@ -40,23 +40,23 @@ public class BaiduTest extends CihaiCoreApplicationTests {
...
@@ -40,23 +40,23 @@ public class BaiduTest extends CihaiCoreApplicationTests {
@Test
@Test
public
void
testQuery
()
throws
Exception
{
public
void
testQuery
()
throws
Exception
{
String
text1
=
"什么是
山东
"
;
String
text1
=
"什么是
十九大
"
;
String
text2
=
"解释一下
山东
"
;
String
text2
=
"解释一下
十九大
"
;
String
text3
=
"山东是什么"
;
//
String text3 = "山东是什么";
String
text4
=
"山东是什么地方"
;
//
String text4 = "山东是什么地方";
String
text5
=
"
山东
"
;
String
text5
=
"
十九大
"
;
String
expectResult
=
"
山东
"
;
String
expectResult
=
"
十九大
"
;
String
keyword1
=
aipUtilBean
.
extractQueryKeyword
(
text1
);
String
keyword1
=
aipUtilBean
.
extractQueryKeyword
(
text1
);
String
keyword2
=
aipUtilBean
.
extractQueryKeyword
(
text2
);
String
keyword2
=
aipUtilBean
.
extractQueryKeyword
(
text2
);
String
keyword3
=
aipUtilBean
.
extractQueryKeyword
(
text3
);
//
String keyword3 = aipUtilBean.extractQueryKeyword(text3);
String
keyword4
=
aipUtilBean
.
extractQueryKeyword
(
text4
);
//
String keyword4 = aipUtilBean.extractQueryKeyword(text4);
String
keyword5
=
aipUtilBean
.
extractQueryKeyword
(
text5
);
String
keyword5
=
aipUtilBean
.
extractQueryKeyword
(
text5
);
Assert
.
assertEquals
(
expectResult
,
keyword1
);
Assert
.
assertEquals
(
expectResult
,
keyword1
);
Assert
.
assertEquals
(
expectResult
,
keyword2
);
Assert
.
assertEquals
(
expectResult
,
keyword2
);
Assert
.
assertEquals
(
expectResult
,
keyword3
);
//
Assert.assertEquals(expectResult, keyword3);
Assert
.
assertEquals
(
expectResult
,
keyword4
);
//
Assert.assertEquals(expectResult, keyword4);
Assert
.
assertEquals
(
expectResult
,
keyword5
);
Assert
.
assertEquals
(
expectResult
,
keyword5
);
}
}
...
@@ -74,11 +74,11 @@ public class BaiduTest extends CihaiCoreApplicationTests {
...
@@ -74,11 +74,11 @@ public class BaiduTest extends CihaiCoreApplicationTests {
List
<
String
>
list
=
aipUtilBean
.
extractKeyWords
(
"中国共产党第十九次全国代表大会"
,
text
,
15
);
List
<
String
>
list
=
aipUtilBean
.
extractKeyWords
(
"中国共产党第十九次全国代表大会"
,
text
,
15
);
System
.
out
.
println
(
JSON
.
toJSONString
(
list
));
System
.
out
.
println
(
JSON
.
toJSONString
(
list
));
Map
<
Pair
<
String
,
String
>,
Double
>
map
=
aipUtilBean
.
calcKeywordsRelated
(
"中国共产党第十九次全国代表大会"
,
list
);
Map
<
Pair
<
String
,
String
>,
Double
>
map
=
aipUtilBean
.
calcKeywordsRelated
(
"中国共产党第十九次全国代表大会"
,
list
);
System
.
out
.
println
(
map
);
for
(
String
s
:
list
)
{
for
(
String
s
:
list
)
{
Map
<
Pair
<
String
,
String
>,
Double
>
map1
=
aipUtilBean
.
calcKeywordsRelated
(
s
,
list
);
Map
<
Pair
<
String
,
String
>,
Double
>
map1
=
aipUtilBean
.
calcKeywordsRelated
(
s
,
list
);
System
.
out
.
print
(
map1
);
System
.
out
.
print
ln
(
map1
);
}
}
System
.
out
.
print
(
map
);
// Map<String, Long> map = aipUtilBean.extractNounWordsWithCount(text);
// Map<String, Long> map = aipUtilBean.extractNounWordsWithCount(text);
// System.out.print(map.toString());
// System.out.print(map.toString());
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment