SELECT table, formatReadableSize(sum(data_compressed_bytes)) AS tc, formatReadableSize(sum(data_uncompressed_bytes)) AS tu, sum(data_compressed_bytes) / sum(data_uncompressed_bytes) AS ratio FROMsystem.columns WHERE (database = currentDatabase()) AND (tableIN ('small_table_local', 'big_table_local')) GROUPBYtable ORDERBYtableASC
SELECT a.UID,B.UID from dwh.small_tablea LEFT JOIN dwh.big_tableb on a.UID = b.UID
这个查询在ClickHouse中要跑近300秒
1 2 3 4 5 6 7 8 9 10 11 12 13
#time clickhouse-client --time --progress --query=" SELECT a.UID, B.UID FROM dwh.small_table a LEFT JOIN dwh.big_table b ON a.UID = b.UID " > /dev/null 293.769
real 4m53.798s user 0m0.574s sys 0m0.225s
而在TIDB只需要20秒(节点数和配置比CH略好, 数据略多于CH, 未使用TIFlash)
1 2 3 4 5 6 7 8 9 10 11 12 13
# time mysql -uroot -hxx.xx.xx -P4000 -p dwh -e " SELECT a.UID,B.UID FROM dwh.small_tablea LEFTJOIN dwh.big_tableb ON a.UID = b.UID; "> /dev/null Enterpassword:
我们知道在常见的关系型数据库如Oralce、MySQL中, LEFT JOIN和RIGTH JOIN是可以等价改写的, 那么我改成RIGHT JOIN不就”把小表放在右边”了吗, 于是SQL改写为
1
SELECT a.UID,B.UID from dwh.big_tableb RIGHT JOIN dwh.small_tablea on a.UID = b.UID
实测
1 2 3 4 5 6 7 8 9 10 11 12 13
#time clickhouse-client --time --progress --query=" SELECT a.UID, B.UID FROM dwh.big_table b RIGHT JOIN dwh.small_table a ON a.UID = b.UIDT " > /dev/null 19.588
real 0m19.609s user 0m0.742s sys 0m0.293s
没想到还真好使… 难道CH优化器这么弱?
谨慎起见, 我比对了一下结果, 简单count一下吧
LEFT JOIN
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#time clickhouse-client --time --progress --query=" SELECT COUNT(*) FROM dwh.small_table a LEFT JOIN dwh.big_table b ON a.UID = b.UID " 6042735 --count 917.560 --时间
real 15m17.580s user 0m0.253s sys 0m0.489s
RIGHT JOIN
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#time clickhouse-client --time --progress --query=" SELECT COUNT(*) FROM dwh.big_table b RIGHT JOIN dwh.small_table a ON a.UID = b.UID " 6897617 --count 11.655 --时间
ch-node-05default@localhost:9000 [dwh] :) SELECT :-] _shard_num, :-] count(*) :-] FROM :-] ( :-] SELECT :-] _shard_num, :-] a.* :-] FROM dwh.t1 AS a :-] ) :-] GROUP BY _shard_num :-] WITH ROLLUP;
SELECT _shard_num, count(*) FROM ( SELECT _shard_num, a.* FROM dwh.t1 AS a ) GROUP BY _shard_num WITH ROLLUP
ch-node-05default@localhost:9000 [dwh] :) SELECT :-] _shard_num, :-] count(*) :-] FROM :-] ( :-] SELECT :-] _shard_num, :-] a.* :-] FROM dwh.t2 AS a :-] ) :-] GROUP BY _shard_num :-] WITH ROLLUP;
SELECT _shard_num, count(*) FROM ( SELECT _shard_num, a.* FROM dwh.t2 AS a ) GROUP BY _shard_num WITH ROLLUP
ch-node-05default@localhost:9000 [dwh] :) SELECT :-] a.I_ID, :-] b.I_ID :-] FROM dwh.t2 AS a :-] LEFT JOIN dwh.t1 AS b ON a.I_ID = b.I_ID :-] ORDER BY a.I_ID ASC;
SELECT a.I_ID, b.I_ID FROM dwh.t2 AS a LEFT JOIN dwh.t1 AS b ON a.I_ID = b.I_ID ORDER BY a.I_ID ASC
ch-node-05default@localhost:9000 [dwh] :) SELECT :-] a.I_ID, :-] b.I_ID :-] FROM dwh.t1 AS b :-] RIGHT JOIN dwh.t2 AS a ON a.I_ID = b.I_ID :-] ORDER BY a.I_ID ASC;
SELECT a.I_ID, b.I_ID FROM dwh.t1 AS b RIGHT JOIN dwh.t2 AS a ON a.I_ID = b.I_ID ORDER BY toUInt32(a.I_ID) ASC
#clickhouse-client --time --progress --query="SELECT count(*) from dwh.big_table b RIGHT JOIN dwh.small_table_total a on a.UID = b.UID" → Progress: 0.00 rows, 0.00 B (0.00 rows/s., 0.00 B/s.) Received exception from server (version 20.3.5): Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Received from bj2-ch-node-04:9000. DB::Exception: Table dwh.small_table_total doesn't exist.. 0.107
测试查询速度
1 2 3 4 5 6 7 8 9 10
#clickhouse-client --time --progress --query=" SELECT COUNT(*) FROM dwh.big_table_total b RIGHTJOIN dwh.small_table_total a ON a.UID = b.UID " 6042735 --count 7.262 --用时
好家伙, 数据准确, 而且比分片了还快
难道只能用本地表?
分布式表要想RIGHT JOIN返回正确结果, 只能改写SQL
原始语句
1 2 3 4 5 6
SELECT a.UID, b.UID FROM dwh.small_table a LEFT JOIN dwh.big_tableb ON a.UID = b.UID
改写为INNER JOIN, 但是没有改表顺序(性能差)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
SELECT a.id, b.uid FROM dwh.small_table a GLOBAL INNER JOIN dwh.big_tableb ON a.UID = b.UID UNION ALL SELECT a.UID, NULL FROM dwh.small_table a WHERE a.UID GLOBAL NOT IN (SELECT UID FROM dwh.big_table)
SELECT a.UID, b.UID FROM dwh.small_table AS a GLOBALINNERJOIN dwh.big_table AS b ON a.UID = b.UID UNIONALL SELECT a.UID, NULL FROM dwh.small_table AS a WHERE a.UID GLOBALNOTIN ( SELECT UID FROM dwh.big_table )
↑ Progress: 220.53 million rows, 29.82 GB (51.24 million rows/s., 6.93 GB/s.) ████████████████████████████████████▋ 20%Received exceptionfromserver (version20.3.5): Code: 241. DB::Exception: Received from localhost:9000. DB::Exception: Memorylimit (forquery) exceeded: would use50.00 GiB (attempt toallocatechunkof4249200bytes), maximum: 50.00 GiB: (while reading column CH_BILL_USER_TELEPHONE): (while reading from part /data/clickhouse/ch_9000/data/dwh/big_table_local/201910_0_5_1/ from mark 216with max_rows_to_read = 8192): Code: 241, e.displayText() = DB::Exception: Memorylimit (forquery) exceeded: would use50.00 GiB (attempt toallocatechunkof4227680bytes), maximum: 50.00 GiB: (while reading column CH_XXX_NAME): (while reading from part /data/clickhouse/ch_9000/data/dwh/big_table_local/202001_6_11_1/ from mark 240with max_rows_to_read = 8192) (version20.3.5.21 (official build)): Code: 241, e.displayText() = DB::Exception: Memorylimit (forquery) exceeded: would use50.00 GiB (attempt toallocatechunkof5211280bytes), maximum: 50.00 GiB: (avg_value_size_hint = 66, avg_chars_size = 69.6, limit = 8192): (while reading column CH_BROKER_NAME): (while reading from part /data/clickhouse/ch_9000/data/dwh/big_table_local/202007_6_11_1/ from mark 24with max_rows_to_read = 8192) (version20.3.5.21 (official build)): Code: 241, e.displayText() = DB::Exception: Memorylimit (forquery) exceeded: would use50.00 GiB (attempt toallocatechunkof4572064bytes), maximum: 50.00 GiB: (avg_value_size_hint = 66.00048828125, avg_chars_size = 69.6005859375, limit = 8192): (while reading column CH_XX_NAME): (while reading from part /data/clickhouse/ch_9000/data/dwh/big_table_local/201805_2_2_0/ from mark 24with max_rows_to_read = 8192) (version20.3.5.21 (official build)): While executing CreatingSetsTransform.
0rowsin set. Elapsed: 4.404 sec. Processed 220.53 million rows, 29.82 GB (50.07 million rows/s., 6.77 GB/s.)
SELECT a.UID, b.UID FROM dwh.small_table AS a INNERJOIN dwh.big_table AS b ON a.UID = b.UID UNIONALL SELECT a.UID, NULL FROM dwh.small_table AS a WHERE a.UID GLOBALNOTIN ( SELECT UID FROM dwh.big_table )
↑ Progress: 1.91 billion rows, 105.59 GB (6.36 million rows/s., 352.30 MB/s.) ██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ 90%Received exceptionfromserver (version20.3.5): Code: 241. DB::Exception: Received from localhost:9000. DB::Exception: Received from clickhouse-node-01:9000. DB::Exception: Memorylimit (total) exceeded: would use50.10 GiB (attempt toallocatechunkof133829856bytes), maximum: 50.00 GiB: While executing CreatingSetsTransform.
0rowsin set. Elapsed: 299.809 sec. Processed 1.91 billion rows, 105.59 GB (6.35 million rows/s., 352.18 MB/s.)
改写表顺序, 让小表在”右边”
1 2 3 4 5 6 7 8 9 10
SELECT a.UID FROM dwh.big_table b GLOBALINNERJOIN dwh.small_table a ON a.UID = b.UID UNIONALL SELECT a.UID,nullFROM dwh.small_table a WHERE a.UID GLOBALNOTIN ( SELECT UID FROM dwh.big_table WHERE UID GLOBALIN (SELECTidFROM dwh.small_table) )
实测
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
time clickhouse-client --time --progress --query=" SELECT a.UID,b.UID FROM dwh.big_table b GLOBALINNERJOIN dwh.small_table a on a.UID = b.UID UNIONALL SELECT a.UID,nullFROM dwh.small_table a WHERE a.UID GLOBALNOTIN ( SELECT UID FROM dwh.big_table WHERE UID GLOBALIN (SELECT UID FROM dwh.small_table) )" >/dev/null 21.142
real 0m21.164s user 0m1.133s sys 0m0.378s
看一下行数对不对
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
time clickhouse-client --time --progress --query=" SELECT sum(cnt) FROM ( SELECT count(*) cnt FROM dwh.big_table b GLOBALINNERJOIN dwh.small_table a on a.UID = b.UID UNIONALL SELECT count(*) cnt FROM dwh.small_table a WHERE a.UID GLOBALNOTIN ( SELECT UID FROM dwh.big_table WHERE UID GLOBALIN (SELECT UID FROM dwh.small_table) ))" 6042735 --count 12.525 --用时