个人专题目录](//www.greatytc.com/p/140e2a59db2c)
1. elasticsearch文档查询入门
DSL查询封装
public void builder(String index, QueryBuilder queryBuilder, HighlightBuilder highlightBuilder, AggregationBuilder aggregationBuilder) throws Exception {
//构建查询请求对象,指定查询的索引名称
SearchRequest searchRequest = new SearchRequest(index);
//设置要查询的 type
//创建查询条件的封装对象
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(queryBuilder);
searchSourceBuilder.highlighter(highlightBuilder);
searchSourceBuilder.aggregation(aggregationBuilder);
//对我们的请求指定查询条件
searchRequest.source(searchSourceBuilder);
log.info("source:" + searchRequest.source());
//添加分页信息
searchSourceBuilder.from(0);
searchSourceBuilder.size(100);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//获取命中对象
SearchHits searchHits = searchResponse.getHits();
Aggregations aggregations = searchResponse.getAggregations();
if (aggregations != null) {
Map<String, Aggregation> aggregationMap = aggregations.getAsMap();
aggregationMap.forEach((k, v) -> {
Aggregation aggregation = aggregationMap.get(k);
if (aggregation != null) {
String name = aggregation.getName();
String type = aggregation.getType();
Map<String, Object> metaData = aggregation.getMetaData();
log.info("name={},type={},metaData={}", name, type, metaData);
log.info("value={}", v);
if (CardinalityAggregationBuilder.NAME.equals(v.getType())) {
ParsedCardinality cardinality = (ParsedCardinality) v;
log.info(cardinality.getValue());
} else if (DateRangeAggregationBuilder.NAME.equals(v.getType())) {
ParsedDateRange dateRange = (ParsedDateRange) v;
log.info(dateRange.getBuckets());
} else if (HistogramAggregationBuilder.NAME.equals(v.getType())) {
ParsedHistogram histogram = (ParsedHistogram) v;
log.info(histogram.getBuckets());
} else if (DateHistogramAggregationBuilder.NAME.equals(v.getType())) {
ParsedDateHistogram dateHistogram = (ParsedDateHistogram) v;
log.info(dateHistogram.getBuckets());
} else if (ExtendedStatsAggregationBuilder.NAME.equals(v.getType())) {
ParsedExtendedStats extendedStats = (ParsedExtendedStats) v;
log.info(extendedStats.getSum());
} else if (StringTerms.NAME.equals(v.getType())) {
ParsedStringTerms parsedStringTerms = (ParsedStringTerms) v;
log.info(parsedStringTerms.getBuckets());
} else if (RangeAggregationBuilder.NAME.equals(v.getType())) {
ParsedRange parsedRange = (ParsedRange) v;
log.info(parsedRange.getBuckets());
}
}
});
}
//获取总记录数
long value = searchHits.getTotalHits().value;
log.info("count:" + value);
//获取Hits数据 数组
SearchHit[] hits = searchHits.getHits();
for (SearchHit hit : hits) {
//得到高亮显示的集合
Map<String, HighlightField> map = hit.getHighlightFields();
map.forEach((k, v) -> {
HighlightField highlightField = map.get(k);
if (highlightField != null) {
log.info(highlightField.getName());
Text[] texts = highlightField.getFragments();
log.info("高亮显示结果" + texts[0]);
}
});
//获取json字符串格式的数据
String resultJson = hit.getSourceAsString();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
log.info("查询到的数据分别是{}", resultJson);
}
}
1.1 term & terms查询
term
term是代表完全匹配,也就是精确查询,搜索前不会再对搜索词进行分词,所以我们的搜索词必须是文档分词集合中的一个。数字、boolean、date这种数据类型天然支持,text需要建索引时指定为not_analyzed,才能用term query,所以尽可能还是自己去手动建立索引,指定not_analyzed吧。在最新版本的es中,不需要指定not_analyzed也可以,将type=keyword即可。
比如说我们要查找省份(province)中为“北京”的所有文档,JSON如下:
GET /book-index/_search/
{
"from": 0, #偏移量 0
"size": 5, #总共 5 条,组合为分页
"query": {
"term": {
"province": {
"brandName": "三星",
"boost": 1.0
}
}
}
}
#不需要指定分数的情况下
GET /book-index/_search/
{
"from": 0,
"size": 5,
"query": {
"term": {
"brandName": "三星",
}
}
}
#返回指定列
GET /book-index/_search/
{
"from": 0,
"size": 5,
"_source": ["id","title"],
"query": {
"term": {
"brandName": {
"value": "三星",
"boost": 1.0
}
}
}
}
public void termQuery(String index, String fieldName, String value) throws Exception {
builder(index, QueryBuilders.termQuery(fieldName, value));
}
terms
在查询的字段只有一个值的时候,应该使用term而不是terms,在查询字段包含多个的时候才使用terms(类似于sql中的in、or),使用terms语法.
比如说我们要查找省份(province)为“湖北省”或“北京”的所有文档,JSON如下:
GET /book-index/_search/
{
"from": 0,
"size": 100,
"query": {
"terms": {
"brandName": [
"三星",
"飞利浦"
],
"boost": 1
}
}
}
public void termsQuery(String index, String fieldName, List<Object> value) throws Exception {
builder(index, QueryBuilders.termsQuery(fieldName, value));
}
//查询品牌为三星的数据
@Test
public void testTermQuery() throws Exception {
baseQuery.termQuery(Constants.INDEX_NAME, "brandName", "三星");
}
//查询品牌为三星或者飞利浦的数据
@Test
public void testTermsQuery() throws Exception {
List<Object> list = new ArrayList<>();
list.add("三星");
list.add("飞利浦");
baseQuery.termsQuery(Constants.INDEX_NAME, "brandName", list);
}
1.2 match查询
match 或 query_string 这样的查询是高层查询,它们了解字段映射的信息:
- 如果查询 日期(date) 或 整数(integer) 字段,它们会将查询字符串分别作为日期或整数对待。
- 如果查询一个( not_analyzed )未分析的精确值字符串字段, 它们会将整个查询字符串作为单个词项对待。
- 但如果要查询一个( analyzed )已分析的全文字段, 它们会先将查询字符串传递到一个合适的分析器,然后生成一个供查询的词项列表。
一旦组成了词项列表,这个查询会对每个词项逐一执行底层的查询,再将结果合并,然后为每个文档生成一个最终的相关度评分。
match查询其实底层是多个term查询,最后将term的结果合并。
#获取分词信息
GET _analyze
{
"analyzer": "ik_smart",
"text": "施华洛世奇"
}
match_all
查询所有的文档。
JSON如下:
GET /book-index/_search/
{
"query": {
"match_all": {
"boost": 1.0
}
}
}
/**
* 查询所有
*/
@Override
public void queryAll(String index) throws Exception {
builder(index, QueryBuilders.matchAllQuery());
}
@Test
public void testQueryAll() throws Exception {
baseQuery.queryAll(Constants.INDEX_NAME);
}
match
GET /book-index/_search/
{
"query": {
"match": {
"title": {
"query": "三星",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
}
}
GET /book-index/_search/
{
"query": {
"match": {
"title": "三星"
}
}
}
/**
* matchQuery:词条分词查询
*/
@Override
public void queryMatch(String index, String keyword, String field) throws Exception {
MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery(field, keyword);
//求并集
queryBuilder.operator(Operator.AND);
builder(index, queryBuilder);
}
@Test
public void testQueryMatch() throws Exception {
baseQuery.queryMatch(Constants.INDEX_NAME, "三星", "title");
}
布尔match
查询标题内容中 即有“三星”同时也有“联通”的文档:
POST /book-index/_search
{
"from": 0,
"size": 100,
"query": {
"match": {
"title": {
"query": "三星 联通",
"operator": "AND",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
}
}
@Override
public void queryMatchWithOperate(String index, String field, String keyword, Operator operator) throws Exception {
builder(index, QueryBuilders.matchQuery(field, keyword).operator(operator));
}
@Test
public void testQueryMatchWithOperate() throws Exception {
baseQuery.queryMatchWithOperate(Constants.INDEX_NAME, "title", "三星 联通", Operator.AND);
}
multi_match
multi_match查询与match查询类似,不同的是他不在作用于一个字段上,该查询通过字段fields参数作用在多个字段上。
例如:查询标题(title)与品牌名称(brandName)中含有“三星”关键字的文档:
POST /book-index/_search
{
"from": 0,
"size": 100,
"query": {
"multi_match": {
"query": "三星",
"fields": [
"brandName^1.0",
"title^1.0"
],
"type": "best_fields",
"operator": "OR",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1
}
}
}
@Override
public void queryMatchMulti(String index, String keyword, String... fields) throws Exception {
builder(index, QueryBuilders.multiMatchQuery(keyword, fields));
}
@Test
public void testQueryMatchMulti() throws Exception {
baseQuery.queryMatchMulti(Constants.INDEX_NAME, "三星", "brandName", "title");
}
match_phrase
对查询词语分析后构建一个短语查询,而不是一个布尔表达式。
一、什么是近似匹配
跟全文检索相对应,相反,全文检索会将输入的搜索串拆解开来,去倒排索引里面去一一匹配,只要能匹配上任意一个拆解后的单词,就可以作为结果返回
。phrase search,要求输入的搜索串,必须在指定的字段文本中,完全包含一模一样的,才可以算匹配,才能作为结果返回
现假设有两个句子
1、java is my favourite programming language, and I also think spark is a very good big data system.
2、java spark are very related, because scala is spark's programming language and scala is also based on jvm like java.
进行match query,query语法如下:
{
"query":{
"match":{
"content":"java spark"
}
}
}
match query进行搜索,只能搜索到包含java或spark的document,包含java和spark的doc都会被返回回来。现在假如说我们要实现以下三个需求:
java spark,就靠在一起,中间不能插入任何其他字符,就要搜索出来这种doc
java spark,但是要求,java和spark两个单词靠的越近,doc的分数越高,排名越靠前
我们搜索时,文档中必须包含java spark这两个文档,且他们之间的距离不能超过5,
要实现上述三个需求,用match做全文检索,是搞不定的,必须得用proximity match(近似匹配),proximity match分两种,短语匹配(phrase match)和近似匹配(proximity match)。
二、match_phrase的用法
phrase match,就是要去将多个term作为一个短语,一起去搜索,只有包含这个短语的doc才会作为结果返回。match是只在包含其中任何一个分词就返回。
POST /book-index/_search
{
"from": 0,
"size": 100,
"query": {
"match_phrase": {
"title": {
"query": "联通3G手机",
"slop": 0,
"zero_terms_query": "NONE",
"boost": 1
}
}
}
}
@Override
public void queryMatchPhrase(String index, String keyword, String field) throws Exception {
builder(index, QueryBuilders.matchPhraseQuery(field, keyword));
}
@Test
public void testQueryMatchPhrase() throws Exception {
baseQuery.queryMatchPhrase(Constants.INDEX_NAME, "联通3G手机", "title");
}
queryString
#queryString
POST /book-index/_search
{
"from": 0,
"size": 100,
"query": {
"query_string": {
"query": "三星 AND 手机",
"fields": [
"brandName^1.0",
"categoryName^1.0",
"title^1.0"
],
"type": "best_fields",
"default_operator": "and",
"max_determinized_states": 10000,
"enable_position_increments": true,
"fuzziness": "AUTO",
"fuzzy_prefix_length": 0,
"fuzzy_max_expansions": 50,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1
}
}
}
@Override
public void queryStringQuery(String index) throws Exception {
//queryString
QueryStringQueryBuilder query = QueryBuilders
.queryStringQuery("三星 AND 手机").field("title").field("categoryName").field("brandName").defaultOperator(Operator.AND);
builder(index, query);
}