Skip to content
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation
This project
Loading...
Sign in
石磊
/
cihai
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 31cf427b
authored
Dec 06, 2017
by
侯昆
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
关键词抽取以及搜索词抽取完成
1 parent
37e4e354
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
408 additions
and
37 deletions
serverside/cihai-core/src/main/java/com/dookay/cihai/core/CihaiCoreApplication.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/AipUtilBean.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/DepParserItem.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/DeprelConst.java
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/LexerPosConst.java
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduDemo.java
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduTest.java
serverside/cihai-core/src/test/java/com/dookay/cihai/core/HankcsTest.java
serverside/cihai-core/src/test/resources/application.properties
serverside/pom.xml
serverside/cihai-core/src/main/java/com/dookay/cihai/core/CihaiCoreApplication.java
View file @
31cf427
...
@@ -4,8 +4,6 @@ import com.dookay.coral.common.core.CoralCommonCoreMarker;
...
@@ -4,8 +4,6 @@ import com.dookay.coral.common.core.CoralCommonCoreMarker;
import
org.mybatis.spring.annotation.MapperScan
;
import
org.mybatis.spring.annotation.MapperScan
;
import
org.springframework.boot.SpringApplication
;
import
org.springframework.boot.SpringApplication
;
import
org.springframework.boot.autoconfigure.SpringBootApplication
;
import
org.springframework.boot.autoconfigure.SpringBootApplication
;
import
org.springframework.boot.builder.SpringApplicationBuilder
;
import
org.springframework.boot.web.support.SpringBootServletInitializer
;
/**
/**
...
...
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/AipUtilBean.java
View file @
31cf427
...
@@ -16,9 +16,15 @@ package com.dookay.cihai.core.aip;
...
@@ -16,9 +16,15 @@ package com.dookay.cihai.core.aip;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSON
;
import
com.baidu.aip.nlp.AipNlp
;
import
com.baidu.aip.nlp.AipNlp
;
import
com.dookay.cihai.core.aip.consts.DepPostTagConst
;
import
com.dookay.cihai.core.aip.consts.DeprelConst
;
import
com.dookay.cihai.core.aip.consts.LexerNeConst
;
import
com.dookay.cihai.core.aip.consts.LexerNeConst
;
import
com.dookay.cihai.core.aip.consts.LexerPosConst
;
import
com.dookay.cihai.core.aip.consts.LexerPosConst
;
import
com.dookay.coral.common.core.exception.ServiceException
;
import
com.dookay.coral.common.core.utils.lang.CollectionUtils
;
import
com.dookay.coral.common.core.utils.lang.StringUtils
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.collections4.ListUtils
;
import
org.apache.commons.collections4.set.ListOrderedSet
;
import
org.apache.commons.collections4.set.ListOrderedSet
;
import
org.json.JSONArray
;
import
org.json.JSONArray
;
import
org.json.JSONException
;
import
org.json.JSONException
;
...
@@ -26,10 +32,7 @@ import org.json.JSONObject;
...
@@ -26,10 +32,7 @@ import org.json.JSONObject;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.stereotype.Component
;
import
org.springframework.stereotype.Component
;
import
java.util.Comparator
;
import
java.util.*
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
import
java.util.stream.Collectors
;
/**
/**
...
@@ -42,22 +45,93 @@ import java.util.stream.Collectors;
...
@@ -42,22 +45,93 @@ import java.util.stream.Collectors;
@Slf4j
@Slf4j
public
final
class
AipUtilBean
{
public
final
class
AipUtilBean
{
/**
* 内部错误
*/
private
static
final
String
INTERNAL_ERROR
=
"282000"
;
@Autowired
@Autowired
private
AipNlp
aipNlp
;
private
AipNlp
aipNlp
;
/**
/**
* 抽取查询关键词
*
* @param queryString
* @return
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
public
String
extractQueryKeyword
(
String
queryString
)
throws
JSONException
{
JSONObject
res
=
doDepParser
(
queryString
,
1
);
JSONArray
items
=
res
.
getJSONArray
(
"items"
);
List
<
DepParserItem
>
depParserItems
=
JSON
.
parseArray
(
items
.
toString
(),
DepParserItem
.
class
);
// 单个词情况
if
(
depParserItems
.
size
()
==
1
)
{
return
depParserItems
.
get
(
0
).
getWord
();
}
// 先找到句子的核心
DepParserItem
hed
=
depParserItems
.
stream
().
filter
(
d
->
d
.
getDeprel
().
equals
(
DeprelConst
.
HED
)).
findFirst
().
orElse
(
null
);
List
<
DepParserItem
>
keywords
=
new
ArrayList
<>();
if
(
hed
!=
null
)
{
int
hedIndex
=
depParserItems
.
indexOf
(
hed
);
// 首先考虑父级为核心且本身为名词的情况
for
(
DepParserItem
item
:
depParserItems
)
{
boolean
parentIsHed
=
item
.
getHead
()
==
hedIndex
+
1
;
boolean
isSBVorVOB
=
item
.
getDeprel
().
equals
(
DeprelConst
.
SBV
)
||
item
.
getDeprel
().
equals
(
DeprelConst
.
VOB
);
boolean
isNoun
=
DepPostTagConst
.
inThis
(
item
.
getPostag
());
if
(
parentIsHed
&&
isSBVorVOB
&&
isNoun
)
{
keywords
.
add
(
item
);
}
}
// 若不匹配,则
if
(
CollectionUtils
.
isEmpty
(
keywords
))
{
throw
new
ServiceException
(
"无名词宾语或主语"
);
}
}
else
{
// 无核心情况下
throw
new
ServiceException
(
"无核心"
);
}
if
(
keywords
.
size
()
==
1
)
{
return
keywords
.
get
(
0
).
getWord
();
}
DepParserItem
keyword
=
keywords
.
stream
().
filter
(
k
->
k
.
getDeprel
().
equals
(
DeprelConst
.
SBV
)).
findFirst
().
orElse
(
null
);
return
keyword
.
getWord
();
}
/**
* 获取文章中名词及其出现次数
*
* @param document 文章内容
* @return
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
public
Map
<
String
,
Long
>
extractNounWordsWithCount
(
String
document
)
throws
JSONException
{
List
<
LexerItem
>
lexerItems
=
getLexerItems
(
document
);
Map
<
String
,
Long
>
itemCount
=
lexerItems
.
stream
()
.
filter
(
l
->
LexerPosConst
.
inThis
(
l
.
getPos
())
||
LexerNeConst
.
inThis
(
l
.
getNe
()))
.
collect
(
Collectors
.
groupingBy
(
LexerItem:
:
getItem
,
Collectors
.
counting
()
));
return
itemCount
;
}
/**
* 抽取关键词
* 抽取关键词
*
*
* @param document 文档
* @param document 文档
* @param size 关键词个数
* @param size 关键词个数
* @return
* @return
* @throws JSONException
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
*/
public
List
<
String
>
extractKeyWords
(
String
document
,
int
size
)
throws
JSONException
{
public
List
<
String
>
extractKeyWords
(
String
document
,
int
size
)
throws
JSONException
{
JSONObject
res
=
aipNlp
.
lexer
(
document
);
List
<
LexerItem
>
lexerItems
=
getLexerItems
(
document
);
JSONArray
items
=
res
.
getJSONArray
(
"items"
);
log
.
debug
(
items
.
toString
(
2
));
List
<
LexerItem
>
lexerItems
=
JSON
.
parseArray
(
items
.
toString
(),
LexerItem
.
class
);
Map
<
String
,
Set
<
LexerItem
>>
itemMap
=
lexerItems
.
stream
()
Map
<
String
,
Set
<
LexerItem
>>
itemMap
=
lexerItems
.
stream
()
.
filter
(
l
->
LexerPosConst
.
inThis
(
l
.
getPos
())
.
filter
(
l
->
LexerPosConst
.
inThis
(
l
.
getPos
())
||
LexerNeConst
.
inThis
(
l
.
getNe
()))
||
LexerNeConst
.
inThis
(
l
.
getNe
()))
...
@@ -79,4 +153,107 @@ public final class AipUtilBean {
...
@@ -79,4 +153,107 @@ public final class AipUtilBean {
});
});
return
words
.
stream
().
limit
(
size
).
collect
(
Collectors
.
toList
());
return
words
.
stream
().
limit
(
size
).
collect
(
Collectors
.
toList
());
}
}
/**
* 获取词法分析项
*
* @param document
* @return
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
private
List
<
LexerItem
>
getLexerItems
(
String
document
)
throws
JSONException
{
List
<
String
>
documents
=
splitDocument
(
document
,
10000
);
List
<
LexerItem
>
lexerItems
=
new
ArrayList
<>();
for
(
String
s
:
documents
)
{
log
.
debug
(
"分段: "
+
s
.
length
());
log
.
debug
(
s
);
JSONObject
res
=
doLexer
(
s
);
JSONArray
items
=
res
.
getJSONArray
(
"items"
);
// log.debug(items.toString(2));
lexerItems
=
ListUtils
.
union
(
lexerItems
,
JSON
.
parseArray
(
items
.
toString
(),
LexerItem
.
class
));
}
return
lexerItems
;
}
/**
* 词法分析
*
* @param s
* @return
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
private
JSONObject
doLexer
(
String
s
)
throws
JSONException
{
JSONObject
res
=
aipNlp
.
lexer
(
s
);
boolean
error
=
res
.
has
(
"error_code"
);
if
(
error
)
{
String
errorCode
=
res
.
getString
(
"error_code"
);
if
(
INTERNAL_ERROR
.
equals
(
errorCode
))
{
res
=
doLexer
(
s
);
}
else
{
throw
new
ServiceException
(
res
.
toString
());
}
}
return
res
;
}
/**
* 句法分析
*
* @param s
* @return
* @throws JSONException
* @author houkun
* @date 2017/12/6
*/
private
JSONObject
doDepParser
(
String
s
,
int
mode
)
throws
JSONException
{
HashMap
<
String
,
Object
>
option
=
new
HashMap
<>();
option
.
put
(
"mode"
,
mode
);
JSONObject
res
=
aipNlp
.
depParser
(
s
,
option
);
boolean
error
=
res
.
has
(
"error_code"
);
if
(
error
)
{
String
errorCode
=
res
.
getString
(
"error_code"
);
if
(
INTERNAL_ERROR
.
equals
(
errorCode
))
{
res
=
doDepParser
(
s
,
mode
);
}
else
{
throw
new
ServiceException
(
res
.
toString
());
}
}
return
res
;
}
/**
* 分割文档
*
* @param document 文档内容
* @param maxSize 每段大小
* @return
* @author houkun
* @date 2017/12/6
*/
private
List
<
String
>
splitDocument
(
String
document
,
int
maxSize
)
{
if
(
document
.
length
()
<=
maxSize
)
{
return
Collections
.
singletonList
(
document
);
}
String
[]
strings
=
StringUtils
.
splitPreserveAllTokens
(
document
,
"\n"
);
ArrayList
<
String
>
result
=
new
ArrayList
<>();
StringBuilder
sb
=
new
StringBuilder
();
long
count
=
0
;
for
(
String
string
:
strings
)
{
if
(
count
<
maxSize
)
{
sb
.
append
(
string
);
count
+=
string
.
length
();
}
else
{
result
.
add
(
sb
.
toString
());
count
=
0
;
sb
=
new
StringBuilder
();
}
}
return
result
;
}
}
}
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/DepParserItem.java
0 → 100644
View file @
31cf427
package
com
.
dookay
.
cihai
.
core
.
aip
;
/*****************************************
* *
* @dookay.com Internet make it happen *
* ----------- ----------------------- *
* dddd ddddd Internet make it happen *
* o o o Internet make it happen *
* k k k Internet make it happen *
* a a a Internet make it happen *
* yyyy yyyyy Internet make it happen *
* ----------- ----------------------- *
* @dookay.com Internet make it happen *
* *
****************************************/
import
lombok.Data
;
/**
* 句法分析结果项
*
* @author houkun
* @date 2017/12/6
*/
@Data
public
class
DepParserItem
{
/**
* 词的父节点
*/
private
int
head
;
/**
* 依存关系
*/
private
String
deprel
;
/**
* 词性
*/
private
String
postag
;
/**
* 词语本身
*/
private
String
word
;
}
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/DeprelConst.java
0 → 100644
View file @
31cf427
package
com
.
dookay
.
cihai
.
core
.
aip
.
consts
;
/*****************************************
* *
* @dookay.com Internet make it happen *
* ----------- ----------------------- *
* dddd ddddd Internet make it happen *
* o o o Internet make it happen *
* k k k Internet make it happen *
* a a a Internet make it happen *
* yyyy yyyyy Internet make it happen *
* ----------- ----------------------- *
* @dookay.com Internet make it happen *
* *
****************************************/
/**
* 依存关系
*
* @author houkun
* @date 2017/12/6
*/
public
interface
DeprelConst
{
/**
* 核心
*/
String
HED
=
"HED"
;
/**
* 主谓关系 subject-verb
*/
String
SBV
=
"SBV"
;
/**
* 动宾关系 verb-subject
*/
String
VOB
=
"VOB"
;
}
serverside/cihai-core/src/main/java/com/dookay/cihai/core/aip/consts/LexerPosConst.java
View file @
31cf427
...
@@ -50,12 +50,22 @@ public interface LexerPosConst {
...
@@ -50,12 +50,22 @@ public interface LexerPosConst {
String
NS
=
"ns"
;
String
NS
=
"ns"
;
/**
/**
* 处所名词
*/
String
S
=
"s"
;
/**
* 名形词
*/
String
AN
=
"an"
;
/**
* 是否在重要词性中
* 是否在重要词性中
*
*
* @param pos
* @param pos
* @return
* @return
*/
*/
static
boolean
inThis
(
String
pos
)
{
static
boolean
inThis
(
String
pos
)
{
return
Arrays
.
asList
(
N
,
NR
,
NZ
,
NT
,
NS
).
contains
(
pos
);
return
Arrays
.
asList
(
N
,
NR
,
NZ
,
NT
,
NS
,
S
,
AN
).
contains
(
pos
);
}
}
}
}
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduDemo.java
0 → 100644
View file @
31cf427
/*****************************************
* *
* @dookay.com Internet make it happen *
* ----------- ----------------------- *
* dddd ddddd Internet make it happen *
* o o o Internet make it happen *
* k k k Internet make it happen *
* a a a Internet make it happen *
* yyyy yyyyy Internet make it happen *
* ----------- ----------------------- *
* NO BUG ENJOY LIFE *
* *
****************************************/
package
com
.
dookay
.
cihai
.
core
;
import
com.baidu.aip.nlp.AipNlp
;
import
org.json.JSONObject
;
import
java.util.HashMap
;
/**
* @author 石磊
* @date 2017/12/5
*/
public
class
BaiduDemo
{
//设置APPID/AK/SK
public
static
final
String
APP_ID
=
"10486245"
;
public
static
final
String
API_KEY
=
"ws8qdxT51xm2qbWufxzRedI3"
;
public
static
final
String
SECRET_KEY
=
"8b6g9ZyR69dFl6aqYdIOGa4IbOGgkdjh"
;
public
static
void
main
(
String
[]
args
)
throws
Exception
{
// 初始化一个AipNlp
AipNlp
client
=
new
AipNlp
(
APP_ID
,
API_KEY
,
SECRET_KEY
);
// 可选:设置网络连接参数
client
.
setConnectionTimeoutInMillis
(
2000
);
client
.
setSocketTimeoutInMillis
(
60000
);
String
text
=
"山东是什么"
;
String
text1
=
"百度云登陆首页"
;
String
text2
=
"手机缝隙灰尘怎么清除"
;
// 调用接口
// // 词法
// String text = "陕西富平人";
// JSONObject res = client.lexer(text);
// System.out.println(res.toString(2));
//
// // 句法
HashMap
<
String
,
Object
>
option
=
new
HashMap
<>();
option
.
put
(
"mode"
,
1
);
JSONObject
des
=
client
.
depParser
(
text
,
option
);
System
.
out
.
println
(
des
.
toString
(
2
));
JSONObject
des1
=
client
.
depParser
(
text1
,
option
);
System
.
out
.
print
(
des1
.
toString
(
2
));
JSONObject
des2
=
client
.
depParser
(
text2
,
option
);
System
.
out
.
print
(
des2
.
toString
(
2
));
//
// // 相似
// HashMap<String, String> option1 = new HashMap<>();
// option1.put("model", "BOW");
// JSONObject response = client.simnet("十九大","中国共产党第十九次全国代表大会", option1);
// System.out.println(response.toString(2));
// // 词向量
// JSONObject vect = client.wordEmbedding("习近平");
// System.out.println(vect.toString(2));
//
// // 相似
// HashMap<String, String> option1 = new HashMap<>();
// option1.put("model", "BOW");
// JSONObject response = client.simnet("十九大","中国共产党第十九次全国代表大会", option1);
// System.out.println(response.toString(2));
// // 词向量
// JSONObject vect = client.wordEmbedding("习近平");
// System.out.println(vect.toString(2));
}
}
serverside/cihai-core/src/test/java/com/dookay/cihai/core/BaiduTest.java
View file @
31cf427
...
@@ -15,15 +15,8 @@ package com.dookay.cihai.core;
...
@@ -15,15 +15,8 @@ package com.dookay.cihai.core;
****************************************/
****************************************/
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSON
;
import
com.baidu.aip.nlp.AipNlp
;
import
com.dookay.cihai.core.aip.AipUtilBean
;
import
com.dookay.cihai.core.aip.AipUtilBean
;
import
com.dookay.cihai.core.aip.LexerItem
;
import
org.junit.Assert
;
import
com.dookay.cihai.core.aip.consts.LexerNeConst
;
import
com.dookay.cihai.core.aip.consts.LexerPosConst
;
import
com.hankcs.hanlp.HanLP
;
import
org.apache.commons.collections4.set.ListOrderedSet
;
import
org.json.JSONArray
;
import
org.json.JSONObject
;
import
org.junit.Test
;
import
org.junit.Test
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.core.io.ClassPathResource
;
import
org.springframework.core.io.ClassPathResource
;
...
@@ -32,11 +25,8 @@ import org.springframework.util.FileCopyUtils;
...
@@ -32,11 +25,8 @@ import org.springframework.util.FileCopyUtils;
import
java.io.File
;
import
java.io.File
;
import
java.io.FileReader
;
import
java.io.FileReader
;
import
java.util.Comparator
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
/**
/**
* @author houkun
* @author houkun
...
@@ -48,17 +38,41 @@ public class BaiduTest extends CihaiCoreApplicationTests {
...
@@ -48,17 +38,41 @@ public class BaiduTest extends CihaiCoreApplicationTests {
private
AipUtilBean
aipUtilBean
;
private
AipUtilBean
aipUtilBean
;
@Test
@Test
public
void
test
()
throws
Exception
{
public
void
testQuery
()
throws
Exception
{
String
text
=
"中国共产党第十九次全国代表大会(简称党的十九大)于2017年10月18日至10月24日在北京召开。\n"
+
String
text1
=
"什么是山东"
;
"2017年10月18日上午9:00,中国共产党第十九次全国代表大会在人民大会堂开幕。习近平代表第十八届中央委员会向大会作了题为《决胜全面建成小康社会 夺取新时代中国特色社会主义伟大胜利》的报告。\n"
+
String
text2
=
"解释一下山东"
;
"这次大会的主题是:不忘初心,牢记使命,高举中国特色社会主义伟大旗帜,决胜全面建成小康社会,夺取新时代中国特色社会主义伟大胜利,为实现中华民族伟大复兴的中国梦不懈奋斗。\n"
+
String
text3
=
"山东是什么"
;
"党的十九大,是在全面建成小康社会决胜阶段、中国特色社会主义发展关键时期召开的一次十分重要的大会。承担着谋划决胜全面建成小康社会、深入推进社会主义现代化建设的重大任务,事关党和国家事业继往开来,事关中国特色社会主义前途命运,事关最广大人民根本利益。[1] \n"
+
String
text4
=
"山东是什么地方"
;
"2017年10月24日,中国共产党第十九次全国代表大会在选举产生新一届中央委员会和中央纪律检查委员会,通过关于十八届中央委员会报告的决议、关于十八届中央纪律检查委员会工作报告的决议、关于《中国共产党章程(修正案)》的决议后,在人民大会堂胜利闭幕。[2-3] "
;
String
text5
=
"山东"
;
// Resource resource = new ClassPathResource("text.txt");
// File file = resource.getFile();
String
expectResult
=
"山东"
;
// FileReader reader = new FileReader(file);
// String text = FileCopyUtils.copyToString(reader);
String
keyword1
=
aipUtilBean
.
extractQueryKeyword
(
text1
);
String
keyword2
=
aipUtilBean
.
extractQueryKeyword
(
text2
);
String
keyword3
=
aipUtilBean
.
extractQueryKeyword
(
text3
);
String
keyword4
=
aipUtilBean
.
extractQueryKeyword
(
text4
);
String
keyword5
=
aipUtilBean
.
extractQueryKeyword
(
text5
);
Assert
.
assertEquals
(
expectResult
,
keyword1
);
Assert
.
assertEquals
(
expectResult
,
keyword2
);
Assert
.
assertEquals
(
expectResult
,
keyword3
);
Assert
.
assertEquals
(
expectResult
,
keyword4
);
Assert
.
assertEquals
(
expectResult
,
keyword5
);
}
@Test
public
void
test
()
throws
Exception
{
// String text = "中国共产党第十九次全国代表大会(简称党的十九大)于2017年10月18日至10月24日在北京召开。\n" +
// "2017年10月18日上午9:00,中国共产党第十九次全国代表大会在人民大会堂开幕。习近平代表第十八届中央委员会向大会作了题为《决胜全面建成小康社会 夺取新时代中国特色社会主义伟大胜利》的报告。\n" +
// "这次大会的主题是:不忘初心,牢记使命,高举中国特色社会主义伟大旗帜,决胜全面建成小康社会,夺取新时代中国特色社会主义伟大胜利,为实现中华民族伟大复兴的中国梦不懈奋斗。\n" +
// "党的十九大,是在全面建成小康社会决胜阶段、中国特色社会主义发展关键时期召开的一次十分重要的大会。承担着谋划决胜全面建成小康社会、深入推进社会主义现代化建设的重大任务,事关党和国家事业继往开来,事关中国特色社会主义前途命运,事关最广大人民根本利益。[1] \n" +
// "2017年10月24日,中国共产党第十九次全国代表大会在选举产生新一届中央委员会和中央纪律检查委员会,通过关于十八届中央委员会报告的决议、关于十八届中央纪律检查委员会工作报告的决议、关于《中国共产党章程(修正案)》的决议后,在人民大会堂胜利闭幕。[2-3] ";
Resource
resource
=
new
ClassPathResource
(
"text.txt"
);
File
file
=
resource
.
getFile
();
FileReader
reader
=
new
FileReader
(
file
);
String
text
=
FileCopyUtils
.
copyToString
(
reader
);
List
<
String
>
list
=
aipUtilBean
.
extractKeyWords
(
text
,
10
);
List
<
String
>
list
=
aipUtilBean
.
extractKeyWords
(
text
,
10
);
System
.
out
.
println
(
JSON
.
toJSONString
(
list
));
System
.
out
.
println
(
JSON
.
toJSONString
(
list
));
Map
<
String
,
Long
>
map
=
aipUtilBean
.
extractNounWordsWithCount
(
text
);
System
.
out
.
print
(
map
.
toString
());
}
}
}
}
serverside/cihai-core/src/test/java/com/dookay/cihai/core/HankcsTest.java
View file @
31cf427
...
@@ -14,7 +14,8 @@ package com.dookay.cihai.core;
...
@@ -14,7 +14,8 @@ package com.dookay.cihai.core;
* *
* *
****************************************/
****************************************/
import
com.hankcs.hanlp.HanLP
;
import
com.alibaba.fastjson.JSON
;
import
com.dookay.coral.common.core.utils.lang.StringUtils
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.junit.Test
;
import
org.junit.Test
;
import
org.springframework.core.io.ClassPathResource
;
import
org.springframework.core.io.ClassPathResource
;
...
@@ -24,7 +25,7 @@ import org.springframework.util.FileCopyUtils;
...
@@ -24,7 +25,7 @@ import org.springframework.util.FileCopyUtils;
import
java.io.File
;
import
java.io.File
;
import
java.io.FileReader
;
import
java.io.FileReader
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.List
;
import
java.util.
Array
List
;
/**
/**
* 分词测试
* 分词测试
...
@@ -41,7 +42,9 @@ public class HankcsTest {
...
@@ -41,7 +42,9 @@ public class HankcsTest {
File
file
=
resource
.
getFile
();
File
file
=
resource
.
getFile
();
FileReader
reader
=
new
FileReader
(
file
);
FileReader
reader
=
new
FileReader
(
file
);
String
document
=
FileCopyUtils
.
copyToString
(
reader
);
String
document
=
FileCopyUtils
.
copyToString
(
reader
);
List
<
String
>
strings
=
HanLP
.
extractPhrase
(
document
,
10
);
// ArrayList<String> result = splitDocument(document, 1000);
log
.
info
(
strings
.
toString
());
// log.info(String.valueOf(result.size()));
// log.info(JSON.toJSONString(result));
}
}
}
}
serverside/cihai-core/src/test/resources/application.properties
View file @
31cf427
...
@@ -18,3 +18,4 @@ mybatis.mapper-locations=classpath*:mapper/*.xml
...
@@ -18,3 +18,4 @@ mybatis.mapper-locations=classpath*:mapper/*.xml
aip.app-
id
=
10486245
aip.app-
id
=
10486245
aip.api-
key
=
ws8qdxT51xm2qbWufxzRedI3
aip.api-
key
=
ws8qdxT51xm2qbWufxzRedI3
aip.secret-
key
=
8b6g9ZyR69dFl6aqYdIOGa4IbOGgkdjh
aip.secret-
key
=
8b6g9ZyR69dFl6aqYdIOGa4IbOGgkdjh
logging.level.com.dookay.cihai.core
=
debug
serverside/pom.xml
View file @
31cf427
...
@@ -91,6 +91,6 @@
...
@@ -91,6 +91,6 @@
<modules>
<modules>
<module>
cihai-core
</module>
<module>
cihai-core
</module>
<module>
cihai-wechat
</module>
<module>
cihai-wechat
</module>
<module>
cihai-a
pp
</module>
<module>
cihai-a
dmin
</module>
</modules>
</modules>
</project>
</project>
\ No newline at end of file
\ No newline at end of file
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment