Description:
When use temptable engine to build tmp table, it may lead to performance degradation for IN-Subs.
How to repeat:
Repeat as follows:
```
create table trade (
`trade_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
`pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`trade_id`),
KEY `pay_time` (`pay_time`),
KEY `seller_id` (`seller_id`,`create_time`),
KEY `idx_doudian_open_id` (`doudian_open_id`));
-- t1_in_range:
CREATE TABLE t1_in_range like trade;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
--t1_not_in_range:
CREATE TABLE t1_not_in_range like trade;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
create table trade_000001(
`trade_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
`pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`trade_id`),
KEY `pay_time` (`pay_time`),
KEY `seller_id` (`seller_id`,`create_time`),
KEY `idx_doudian_open_id` (`doudian_open_id`));
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range;
create table shop_buyer_000001(
`shop_buyer_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`buyer_frist_pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`shop_buyer_id`),
KEY `idx_doudian_open_id` (`doudian_open_id`)
);
INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, pay_time, doudian_open_id from t1_in_range;
INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, create_time, doudian_open_id from t1_not_in_range;
mysql> show variables like '%tmp%';
+---------------------------------+----------------------------+
| Variable_name | Value |
+---------------------------------+----------------------------+
| default_tmp_storage_engine | InnoDB |
| innodb_tmpdir | |
| internal_tmp_mem_storage_engine | TempTable |
| replica_load_tmpdir | /home/xychen/tools/run/tmp |
| slave_load_tmpdir | /home/xychen/tools/run/tmp |
| tmp_table_size | 1073741824 |
| tmpdir | /home/xychen/tools/run/tmp |
+---------------------------------+----------------------------+
7 rows in set (0.00 sec)
mysql>
mysql> set tmp_table_size = 64 * 16777216;
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (5 min 24.07 sec)
mysql> set internal_tmp_mem_storage_engine = MEMORY;
Query OK, 0 rows affected (0.00 sec)
mysql>
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (1 min 8.43 sec)
```
Suggested fix:
Cleary, the MEMORY (ie., ha_heap) engine is faster than Temptable engine. However, with the perf data we find that "memset" wastes too may cpus.
```
+ 95.11% 0.00% connection mysqld [.] mysql_execute_command
+ 95.11% 0.00% connection mysqld [.] Sql_cmd_dml::execute
+ 95.06% 0.00% connection mysqld [.] Query_expression::execute
+ 95.06% 0.00% connection mysqld [.] Query_expression::ExecuteIteratorQuery
+ 94.85% 0.01% connection mysqld [.] AggregateIterator::Read
+ 93.81% 0.01% connection mysqld [.] NestedLoopIterator::Read
+ 93.79% 0.01% connection mysqld [.] FilterIterator::Read
+ 91.57% 0.02% connection mysqld [.] Item_cond_and::val_int
+ 91.57% 0.02% connection mysqld [.] Item::val_bool
+ 91.49% 0.00% connection mysqld [.] Item_in_optimizer::val_int
+ 91.47% 0.01% connection mysqld [.] Item_in_subselect::val_bool_naked
+ 91.30% 0.01% connection mysqld [.] Item_subselect::exec
+ 91.29% 0.00% connection mysqld [.] subselect_hash_sj_engine::exec
+ 91.28% 0.02% connection mysqld [.] ExecuteExistsQuery
+ 90.82% 0.01% connection mysqld [.] TABLE::empty_result_table
+ 90.81% 0.00% connection mysqld [.] Query_expression::ClearForExecution
+ 90.80% 0.01% connection mysqld [.] temptable::Handler::delete_all_rows
+ 90.79% 0.01% connection mysqld [.] JOIN::reset
+ 90.72% 90.72% connection libc-2.24.so [.] __memset_avx2_erms
+ 2.32% 0.03% connection mysqld [.] RefIterator<false>::Read
+ 1.92% 0.12% connection mysqld [.] row_search_mvcc
+ 1.87% 0.01% connection mysqld [.] handler::ha_index_read_map
+ 1.65% 0.01% connection mysqld [.] ha_innobase::index_read
```
Check the source code, we find that
```
inline void Table::truncate() {
if (!m_all_columns_are_fixed_size) {
for (auto element : m_rows) {
Row *row = static_cast<Row *>(element);
row->~Row();
}
}
m_rows.clear();
/* Truncate indexes even if `m_indexes_are_enabled` is false. Somebody may
* use truncate() before enabling indexes and we don't want an empty m_rows
* with some stale data inside the indexes. */
for (auto &entry : m_index_entries) {
entry.m_index->truncate();
}
}
void Hash_unique::truncate() { m_hash_table.clear(); }
```
m_hash_table.clear() is called too many times.
Just fix:
```
diff --git a/storage/temptable/src/index.cc b/storage/temptable/src/index.cc
index a39e3d4f8..20455a25d 100644
--- a/storage/temptable/src/index.cc
+++ b/storage/temptable/src/index.cc
@@ -264,7 +264,9 @@ void Hash_unique::erase(const Cursor &target) {
m_hash_table.erase(target.hash_iterator());
}
-void Hash_unique::truncate() { m_hash_table.clear(); }
+void Hash_unique::truncate() {
+ if (!m_hash_table.empty()) m_hash_table.clear();
+}
```
With this patch, the execution time is reduced to 17s.
```
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (17.16s)
```
Description: When use temptable engine to build tmp table, it may lead to performance degradation for IN-Subs. How to repeat: Repeat as follows: ``` create table trade ( `trade_id` bigint NOT NULL AUTO_INCREMENT, `seller_id` bigint DEFAULT NULL, `create_time` datetime DEFAULT NULL, `pay_time` datetime DEFAULT NULL, `doudian_open_id` varchar(256) DEFAULT NULL, PRIMARY KEY (`trade_id`), KEY `pay_time` (`pay_time`), KEY `seller_id` (`seller_id`,`create_time`), KEY `idx_doudian_open_id` (`doudian_open_id`)); -- t1_in_range: CREATE TABLE t1_in_range like trade; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range; --t1_not_in_range: CREATE TABLE t1_not_in_range like trade; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72))); INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range; create table trade_000001( `trade_id` bigint NOT NULL AUTO_INCREMENT, `seller_id` bigint DEFAULT NULL, `create_time` datetime DEFAULT NULL, `pay_time` datetime DEFAULT NULL, `doudian_open_id` varchar(256) DEFAULT NULL, PRIMARY KEY (`trade_id`), KEY `pay_time` (`pay_time`), KEY `seller_id` (`seller_id`,`create_time`), KEY `idx_doudian_open_id` (`doudian_open_id`)); INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range; INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range; INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range; INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range; create table shop_buyer_000001( `shop_buyer_id` bigint NOT NULL AUTO_INCREMENT, `seller_id` bigint DEFAULT NULL, `buyer_frist_pay_time` datetime DEFAULT NULL, `doudian_open_id` varchar(256) DEFAULT NULL, PRIMARY KEY (`shop_buyer_id`), KEY `idx_doudian_open_id` (`doudian_open_id`) ); INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, pay_time, doudian_open_id from t1_in_range; INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, create_time, doudian_open_id from t1_not_in_range; mysql> show variables like '%tmp%'; +---------------------------------+----------------------------+ | Variable_name | Value | +---------------------------------+----------------------------+ | default_tmp_storage_engine | InnoDB | | innodb_tmpdir | | | internal_tmp_mem_storage_engine | TempTable | | replica_load_tmpdir | /home/xychen/tools/run/tmp | | slave_load_tmpdir | /home/xychen/tools/run/tmp | | tmp_table_size | 1073741824 | | tmpdir | /home/xychen/tools/run/tmp | +---------------------------------+----------------------------+ 7 rows in set (0.00 sec) mysql> mysql> set tmp_table_size = 64 * 16777216; Query OK, 0 rows affected (0.00 sec) mysql> SELECT -> count( DISTINCT doudian_open_id ) -> FROM -> ( -> SELECT -> doudian_open_id -> FROM -> ( -> ( -> SELECT -> buyer.doudian_open_id AS doudian_open_id -> FROM -> trade_000001 trade, -> shop_buyer_000001 buyer -> WHERE -> trade.doudian_open_id = buyer.doudian_open_id -> AND trade.seller_id = 6656 -> AND buyer.seller_id = 6656 -> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00' -> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN ( -> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14' -> GROUP BY -> trade.doudian_open_id -> HAVING -> count( 1 ) >= 1 -> ) -> ) -> ) x2 -> ) tmp; +-----------------------------------+ | count( DISTINCT doudian_open_id ) | +-----------------------------------+ | 524288 | +-----------------------------------+ 1 row in set (5 min 24.07 sec) mysql> set internal_tmp_mem_storage_engine = MEMORY; Query OK, 0 rows affected (0.00 sec) mysql> mysql> SELECT -> count( DISTINCT doudian_open_id ) -> FROM -> ( -> SELECT -> doudian_open_id -> FROM -> ( -> ( -> SELECT -> buyer.doudian_open_id AS doudian_open_id -> FROM -> trade_000001 trade, -> shop_buyer_000001 buyer -> WHERE -> trade.doudian_open_id = buyer.doudian_open_id -> AND trade.seller_id = 6656 -> AND buyer.seller_id = 6656 -> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00' -> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN ( -> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14' -> GROUP BY -> trade.doudian_open_id -> HAVING -> count( 1 ) >= 1 -> ) -> ) -> ) x2 -> ) tmp; +-----------------------------------+ | count( DISTINCT doudian_open_id ) | +-----------------------------------+ | 524288 | +-----------------------------------+ 1 row in set (1 min 8.43 sec) ``` Suggested fix: Cleary, the MEMORY (ie., ha_heap) engine is faster than Temptable engine. However, with the perf data we find that "memset" wastes too may cpus. ``` + 95.11% 0.00% connection mysqld [.] mysql_execute_command + 95.11% 0.00% connection mysqld [.] Sql_cmd_dml::execute + 95.06% 0.00% connection mysqld [.] Query_expression::execute + 95.06% 0.00% connection mysqld [.] Query_expression::ExecuteIteratorQuery + 94.85% 0.01% connection mysqld [.] AggregateIterator::Read + 93.81% 0.01% connection mysqld [.] NestedLoopIterator::Read + 93.79% 0.01% connection mysqld [.] FilterIterator::Read + 91.57% 0.02% connection mysqld [.] Item_cond_and::val_int + 91.57% 0.02% connection mysqld [.] Item::val_bool + 91.49% 0.00% connection mysqld [.] Item_in_optimizer::val_int + 91.47% 0.01% connection mysqld [.] Item_in_subselect::val_bool_naked + 91.30% 0.01% connection mysqld [.] Item_subselect::exec + 91.29% 0.00% connection mysqld [.] subselect_hash_sj_engine::exec + 91.28% 0.02% connection mysqld [.] ExecuteExistsQuery + 90.82% 0.01% connection mysqld [.] TABLE::empty_result_table + 90.81% 0.00% connection mysqld [.] Query_expression::ClearForExecution + 90.80% 0.01% connection mysqld [.] temptable::Handler::delete_all_rows + 90.79% 0.01% connection mysqld [.] JOIN::reset + 90.72% 90.72% connection libc-2.24.so [.] __memset_avx2_erms + 2.32% 0.03% connection mysqld [.] RefIterator<false>::Read + 1.92% 0.12% connection mysqld [.] row_search_mvcc + 1.87% 0.01% connection mysqld [.] handler::ha_index_read_map + 1.65% 0.01% connection mysqld [.] ha_innobase::index_read ``` Check the source code, we find that ``` inline void Table::truncate() { if (!m_all_columns_are_fixed_size) { for (auto element : m_rows) { Row *row = static_cast<Row *>(element); row->~Row(); } } m_rows.clear(); /* Truncate indexes even if `m_indexes_are_enabled` is false. Somebody may * use truncate() before enabling indexes and we don't want an empty m_rows * with some stale data inside the indexes. */ for (auto &entry : m_index_entries) { entry.m_index->truncate(); } } void Hash_unique::truncate() { m_hash_table.clear(); } ``` m_hash_table.clear() is called too many times. Just fix: ``` diff --git a/storage/temptable/src/index.cc b/storage/temptable/src/index.cc index a39e3d4f8..20455a25d 100644 --- a/storage/temptable/src/index.cc +++ b/storage/temptable/src/index.cc @@ -264,7 +264,9 @@ void Hash_unique::erase(const Cursor &target) { m_hash_table.erase(target.hash_iterator()); } -void Hash_unique::truncate() { m_hash_table.clear(); } +void Hash_unique::truncate() { + if (!m_hash_table.empty()) m_hash_table.clear(); +} ``` With this patch, the execution time is reduced to 17s. ``` mysql> SELECT -> count( DISTINCT doudian_open_id ) -> FROM -> ( -> SELECT -> doudian_open_id -> FROM -> ( -> ( -> SELECT -> buyer.doudian_open_id AS doudian_open_id -> FROM -> trade_000001 trade, -> shop_buyer_000001 buyer -> WHERE -> trade.doudian_open_id = buyer.doudian_open_id -> AND trade.seller_id = 6656 -> AND buyer.seller_id = 6656 -> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00' -> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN ( -> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14' -> GROUP BY -> trade.doudian_open_id -> HAVING -> count( 1 ) >= 1 -> ) -> ) -> ) x2 -> ) tmp; +-----------------------------------+ | count( DISTINCT doudian_open_id ) | +-----------------------------------+ | 524288 | +-----------------------------------+ 1 row in set (17.16s) ```