GROUP BY
GROUP BY
操作要求读取的行有序,或通过临时表来缓存聚合过程的中间产物。这意味着 MySQL 可以用索引来执行 GROUP BY:
- 松散索引扫描。如果 GROUP BY 的列有索引, MySQL 可以从头到尾扫描索引,避免了产生中间产物。这是推荐的方式,因为如果没有高选择性的条件,创建的临时表可能会很大。
- 过滤行。索引可以用于确定将被存入临时表的行,之后就会在临时表中被聚合。
- 过滤和排序的组合。当用于过滤行的索引已经给数据排好序时就会应用这项优化。
例子24:使用松散索引扫描的 GROUP BY
EXPLAIN FORMAT=JSON
SELECT count(*) as c, continent FROM Country GROUP BY continent;
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "56.80"
},
"grouping_operation": {
"using_filesort": false, # 执行了排序操作
"table": {
"table_name": "Country",
"access_type": "index", # 使用索引
"possible_keys": [
"PRIMARY",
"p",
"c",
"p_c",
"c_p",
"Name"
],
"key": "c",
"used_key_parts": [
"Continent"
],
"key_length": "1",
...
}
}
}
}
例子25:使用索引,然后排序的 GROUP BY
EXPLAIN FORMAT=JSON
SELECT count(*) as c, continent FROM Country WHERE population > 500000000 GROUP BY continent;
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "3.81"
},
"grouping_operation": {
"using_temporary_table": true, # 使用了临时表
"using_filesort": true, # 执行了排序
"cost_info": {
"sort_cost": "2.00"
},
"table": {
"table_name": "Country",
"access_type": "range", # 范围访问方式
"possible_keys": [
"PRIMARY",
"p",
"c",
"p_c",
"c_p",
"Name"
],
"key": "p",
"used_key_parts": [
"Population"
],
"key_length": "4",
...
"attached_condition": "(`world`.`Country`.`Population` > 500000000)"
}
}
}
}
例子26:使用索引,然后过滤和排序的 GROUP BY
EXPLAIN FORMAT=JSON
SELECT count(*) as c, continent FROM Country WHERE continent='Asia' GROUP BY continent;
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "11.23"
},
"grouping_operation": {
"using_filesort": false, # 执行了排序
"table": {
"table_name": "Country",
"access_type": "ref", # 索引 ref 访问方式
"possible_keys": [
"PRIMARY",
"p",
"c",
"p_c",
"c_p",
"Name"
],
"key": "c",
"used_key_parts": [
"Continent"
],
"key_length": "1",
"ref": [
"const"
],
...
}
}
}
}
UNION
MySQL 并不对UNION
应用特殊的优化,而是像语意一样把两个查询结果合并起来并去重。在例子27中可以看到,去重是在一个中间临时表中进行的。使用了 UNION 的查询,其所有执行计划都会用到临时表,因此没有关于它的查询代价优化。
UNION 的简单例子:
SELECT * FROM City WHERE CountryCode = 'CAN'
UNION
SELECT * FROM City WHERE CountryCode = 'USA'
假想的优化:
SELECT * FROM City WHERE CountryCode IN ('CAN', 'USA')
子查询和视图对同一个表的多次访问可以被合并为一次访问,而 UNION 不同,MySQL 不会对它做类似的优化,也不会判断是否已无重复而把UNION
重写成UNION ALL
。许多情形被留给了熟练的优化者,通过应用或修改语句,去手动优化查询和提升性能。
例子27:使用 UNION 查询,需要用到临时表
EXPLAIN FORMAT=JSON
SELECT * FROM City WHERE CountryCode = 'CAN'
UNION
SELECT * FROM City WHERE CountryCode = 'USA'
{
"query_block": {
"union_result": {
"using_temporary_table": true, # 需要用到临时表
"table_name": "<union1,2>", # 合并两个查询的结果
"access_type": "ALL",
"query_specifications": [
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "58.80"
},
"table": {
"table_name": "City",
"access_type": "ref",
"possible_keys": [
"CountryCode"
],
"key": "CountryCode",
"used_key_parts": [
"CountryCode"
],
"key_length": "3",
"ref": [
"const"
],
"rows_examined_per_scan": 49,
"rows_produced_per_join": 49,
"filtered": "100.00",
"cost_info": {
"read_cost": "49.00",
"eval_cost": "9.80",
"prefix_cost": "58.80",
"data_read_per_join": "3K"
},
...
}
}
},
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "129.80"
},
"table": {
"table_name": "City",
"access_type": "ref",
"possible_keys": [
"CountryCode"
],
"key": "CountryCode",
"used_key_parts": [
"CountryCode"
],
"key_length": "3",
"ref": [
"const"
],
"rows_examined_per_scan": 274,
"rows_produced_per_join": 274,
"filtered": "100.00",
"cost_info": {
"read_cost": "75.00",
"eval_cost": "54.80",
"prefix_cost": "129.80",
"data_read_per_join": "19K"
},
...
}
}
}
]
}
}
}
UNION ALL
UNION ALL
相对UNION
只有一个不同:不会执行去重。这意味着某些情况下 MySQL 能够不使用临时表,将查询结果一并返回。
UNION ALL 查询中临时表总是会创建,是否需要使用可以在 EXPLAIN 中看到。例子28展示了添加 ORDER BY 后,UNION ALL 变得需要使用临时表。
例子28:不使用临时表的 UNION ALL
EXPLAIN FORMAT=JSON
SELECT * FROM City WHERE CountryCode = 'CAN'
UNION ALL
SELECT * FROM City WHERE CountryCode = 'USA';
{
"query_block": {
"union_result": {
"using_temporary_table": false, # 不需要使用临时表
"query_specifications": [
...
}
例子29:使用临时表的 UNION ALL,原因是有 ORDER BY
EXPLAIN FORMAT=JSON
SELECT * FROM City WHERE CountryCode = 'CAN'
UNION ALL
SELECT * FROM City WHERE CountryCode = 'USA' ORDER BY Name;
{
"query_block": {
"union_result": {
"using_temporary_table": true, # 使用临时表
"table_name": "<union1,2>",
"access_type": "ALL",
"query_specifications": [
...
}