测评OLAP引擎
三个OLAP引擎均为最新稳定版本
测试环境
三台物理机
机器配置:
CPU 40核Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz
内存 128G
硬盘 7.3T SATA
网卡 10Gb/s
环境搭建:
Apache Doris部署1个FE,3个BE
ClickHouse部署3个节点
DorisDB部署1个FE,3个BE
测试框架
SSB —— 基于TPC-H改进的测试框架,主要测试星型模型下的性能,应用广泛,包括1张事实表,4张维
度表,通过13个SQL查询测试。
测试指标
测试数据
本次测试使用SSB SF = 100数据规模,生成5张表:
数据导入
由于4张维度表数据量不大,导入过程很短,所以维度表导入过程忽略不计,只计算导入lineorder.tbl
三个OLAP都是创建分布式单副本表,Apache Doris和DorisDB采用本地HTTP stream load方式导入,
ClickHouse使用本地文件导入方式
创建宽表lineorder_flat,导入数据
数据准备完成后,磁盘占用情况
查询测试
单表测试结果(ms)
多表Join测试结果(ms)
非性能测试
- Apache Doris在数据刚导入成功后,be compaction消耗比较大
- ClickHouse整体运行CPU、内存消耗小
- Apache Doris与DorisDB运维简单,操作方便
- ClickHouse运维相对困难,配置复杂,创建分布式表复杂
功能对比
附录
建表语句
Apache Doris & DorisDB
REATE TABLE IF NOT EXISTS `lineorder` ( `lo_orderkey` int(11) NOT NULL COMMENT
"", `lo_linenumber` int(11) NOT NULL COMMENT "", `lo_custkey` int(11) NOT NULL
COMMENT "", `lo_partkey` int(11) NOT NULL COMMENT "", `lo_suppkey` int(11) NOT
NULL COMMENT "", `lo_orderdate` date NOT NULL COMMENT "", `lo_orderpriority`
varchar(16) NOT NULL COMMENT "", `lo_shippriority` int(11) NOT NULL COMMENT "",
`lo_quantity` int(11) NOT NULL COMMENT "", `lo_extendedprice` int(11) NOT NULL
COMMENT "", `lo_ordtotalprice` int(11) NOT NULL COMMENT "", `lo_discount`
int(11) NOT NULL COMMENT "", `lo_revenue` int(11) NOT NULL COMMENT "",
`lo_supplycost` int(11) NOT NULL COMMENT "", `lo_tax` int(11) NOT NULL COMMENT
"", `lo_commitdate` date NOT NULL COMMENT "", `lo_shipmode` varchar(11) NOT
NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`lo_orderkey`) COMMENT "OLAP"
DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 30 PROPERTIES ( "replication_num" =
"1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `customer` ( `c_custkey` int(11) NOT NULL COMMENT
"", `c_name` varchar(26) NOT NULL COMMENT "", `c_address` varchar(41) NOT
NULL COMMENT "", `c_city` varchar(11) NOT NULL COMMENT "", `c_nation`
varchar(16) NOT NULL COMMENT "", `c_region` varchar(13) NOT NULL COMMENT "",
`c_phone` varchar(16) NOT NULL COMMENT "", `c_mktsegment` varchar(11) NOT NULL
COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`c_custkey`) COMMENT "OLAP" DISTRIBUTED BY
HASH(`c_custkey`) BUCKETS 10 PROPERTIES ( "replication_num" = "1", "in_memory" =
"false");
CREATE TABLE IF NOT EXISTS `dates` ( `d_datekey` int(11) NOT NULL COMMENT "",
`d_date` varchar(20) NOT NULL COMMENT "", `d_dayofweek` varchar(10) NOT NULL
COMMENT "", `d_month` varchar(11) NOT NULL COMMENT "", `d_year` int(11) NOT
NULL COMMENT "", `d_yearmonthnum` int(11) NOT NULL COMMENT "", `d_yearmonth`
varchar(9) NOT NULL COMMENT "", `d_daynuminweek` int(11) NOT NULL COMMENT "",
`d_daynuminmonth` int(11) NOT NULL COMMENT "", `d_daynuminyear` int(11) NOT
NULL COMMENT "", `d_monthnuminyear` int(11) NOT NULL COMMENT "",
`d_weeknuminyear` int(11) NOT NULL COMMENT "", `d_sellingseason` varchar(14)
NOT NULL COMMENT "", `d_lastdayinweekfl` int(11) NOT NULL COMMENT "",
`d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", `d_holidayfl` int(11) NOT
NULL COMMENT "", `d_weekdayfl` int(11) NOT NULL COMMENT "" ) ENGINE=OLAP
DUPLICATE KEY(`d_datekey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`d_datekey`)
BUCKETS 10 PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `supplier` ( `s_suppkey` int(11) NOT NULL COMMENT
"", `s_name` varchar(26) NOT NULL COMMENT "", `s_address` varchar(26) NOT
NULL COMMENT "", `s_city` varchar(11) NOT NULL COMMENT "", `s_nation`
varchar(16) NOT NULL COMMENT "", `s_region` varchar(13) NOT NULL COMMENT "",
`s_phone` varchar(16) NOT NULL COMMENT "" ) ENGINE=OLAP DUPLICATE
KEY(`s_suppkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 10
PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `part` ( `p_partkey` int(11) NOT NULL COMMENT "",
`p_name` varchar(23) NOT NULL COMMENT "", `p_mfgr` varchar(7) NOT NULL COMMENT
"", `p_category` varchar(8) NOT NULL COMMENT "", `p_brand` varchar(10) NOT
NULL COMMENT "", `p_color` varchar(12) NOT NULL COMMENT "", `p_type`
varchar(26) NOT NULL COMMENT "", `p_size` int(11) NOT NULL COMMENT "",
`p_container` varchar(11) NOT NULL COMMENT "" ) ENGINE=OLAP DUPLICATE
KEY(`p_partkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 10
PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
ClickHouse
CREATE TABLE IF NOT EXISTS `lineorder_flat` ( `LO_ORDERKEY` int(11) NOT NULL
COMMENT "", `LO_ORDERDATE` date NOT NULL COMMENT "", `LO_LINENUMBER`
tinyint(4) NOT NULL COMMENT "", `LO_CUSTKEY` int(11) NOT NULL COMMENT "",
`LO_PARTKEY` int(11) NOT NULL COMMENT "", `LO_SUPPKEY` int(11) NOT NULL COMMENT
"", `LO_ORDERPRIORITY` varchar(100) NOT NULL COMMENT "", `LO_SHIPPRIORITY`
tinyint(4) NOT NULL COMMENT "", `LO_QUANTITY` tinyint(4) NOT NULL COMMENT "",
`LO_EXTENDEDPRICE` int(11) NOT NULL COMMENT "", `LO_ORDTOTALPRICE` int(11) NOT
NULL COMMENT "", `LO_DISCOUNT` tinyint(4) NOT NULL COMMENT "", `LO_REVENUE`
int(11) NOT NULL COMMENT "", `LO_SUPPLYCOST` int(11) NOT NULL COMMENT "",
`LO_TAX` tinyint(4) NOT NULL COMMENT "", `LO_COMMITDATE` date NOT NULL COMMENT
"", `LO_SHIPMODE` varchar(100) NOT NULL COMMENT "", `C_NAME` varchar(100) NOT
NULL COMMENT "", `C_ADDRESS` varchar(100) NOT NULL COMMENT "", `C_CITY`
varchar(100) NOT NULL COMMENT "", `C_NATION` varchar(100) NOT NULL COMMENT "",
`C_REGION` varchar(100) NOT NULL COMMENT "", `C_PHONE` varchar(100) NOT NULL
COMMENT "", `C_MKTSEGMENT` varchar(100) NOT NULL COMMENT "", `S_NAME`
varchar(100) NOT NULL COMMENT "", `S_ADDRESS` varchar(100) NOT NULL COMMENT "",
`S_CITY` varchar(100) NOT NULL COMMENT "", `S_NATION` varchar(100) NOT NULL
COMMENT "", `S_REGION` varchar(100) NOT NULL COMMENT "", `S_PHONE`
varchar(100) NOT NULL COMMENT "", `P_NAME` varchar(100) NOT NULL COMMENT "",
`P_MFGR` varchar(100) NOT NULL COMMENT "", `P_CATEGORY` varchar(100) NOT NULL
COMMENT "", `P_BRAND` varchar(100) NOT NULL COMMENT "", `P_COLOR`
varchar(100) NOT NULL COMMENT "", `P_TYPE` varchar(100) NOT NULL COMMENT "",
`P_SIZE` tinyint(4) NOT NULL COMMENT "", `P_CONTAINER` varchar(100) NOT NULL
COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERKEY`) COMMENT "OLAP" PARTITION BY
RANGE(`LO_ORDERDATE`) (PARTITION p1 VALUES [('0000-01-01'), ('1993-01-01')),
PARTITION p2 VALUES [('1993-01-01'), ('1994-01-01')), PARTITION p3 VALUES
[('1994-01-01'), ('1995-01-01')), PARTITION p4 VALUES [('1995-01-01'), ('1996-01-
01')), PARTITION p5 VALUES [('1996-01-01'), ('1997-01-01')), PARTITION p6 VALUES
[('1997-01-01'), ('1998-01-01')), PARTITION p7 VALUES [('1998-01-01'), ('1999-01-
01'))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 60 PROPERTIES (
"replication_num" = "1", "in_memory" = "false");
clickhouse
REATE TABLE IF NOT EXISTS `lineorder` ( `lo_orderkey` int(11) NOT NULL COMMENT
"", `lo_linenumber` int(11) NOT NULL COMMENT "", `lo_custkey` int(11) NOT NULL
COMMENT "", `lo_partkey` int(11) NOT NULL COMMENT "", `lo_suppkey` int(11) NOT
NULL COMMENT "", `lo_orderdate` date NOT NULL COMMENT "", `lo_orderpriority`
varchar(16) NOT NULL COMMENT "", `lo_shippriority` int(11) NOT NULL COMMENT "",
`lo_quantity` int(11) NOT NULL COMMENT "", `lo_extendedprice` int(11) NOT NULL
COMMENT "", `lo_ordtotalprice` int(11) NOT NULL COMMENT "", `lo_discount`
int(11) NOT NULL COMMENT "", `lo_revenue` int(11) NOT NULL COMMENT "",
`lo_supplycost` int(11) NOT NULL COMMENT "", `lo_tax` int(11) NOT NULL COMMENT
"", `lo_commitdate` date NOT NULL COMMENT "", `lo_shipmode` varchar(11) NOT
NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`lo_orderkey`) COMMENT "OLAP"
DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 30 PROPERTIES ( "replication_num" =
"1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `customer` ( `c_custkey` int(11) NOT NULL COMMENT
"", `c_name` varchar(26) NOT NULL COMMENT "", `c_address` varchar(41) NOT
NULL COMMENT "", `c_city` varchar(11) NOT NULL COMMENT "", `c_nation`
varchar(16) NOT NULL COMMENT "", `c_region` varchar(13) NOT NULL COMMENT "",
`c_phone` varchar(16) NOT NULL COMMENT "", `c_mktsegment` varchar(11) NOT NULL
COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`c_custkey`) COMMENT "OLAP" DISTRIBUTED BY
HASH(`c_custkey`) BUCKETS 10 PROPERTIES ( "replication_num" = "1", "in_memory" =
"false");
CREATE TABLE IF NOT EXISTS `dates` ( `d_datekey` int(11) NOT NULL COMMENT "",
`d_date` varchar(20) NOT NULL COMMENT "", `d_dayofweek` varchar(10) NOT NULL
COMMENT "", `d_month` varchar(11) NOT NULL COMMENT "", `d_year` int(11) NOT
NULL COMMENT "", `d_yearmonthnum` int(11) NOT NULL COMMENT "", `d_yearmonth`
varchar(9) NOT NULL COMMENT "", `d_daynuminweek` int(11) NOT NULL COMMENT "",
`d_daynuminmonth` int(11) NOT NULL COMMENT "", `d_daynuminyear` int(11) NOT
NULL COMMENT "", `d_monthnuminyear` int(11) NOT NULL COMMENT "",
`d_weeknuminyear` int(11) NOT NULL COMMENT "", `d_sellingseason` varchar(14)
NOT NULL COMMENT "", `d_lastdayinweekfl` int(11) NOT NULL COMMENT "",
`d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", `d_holidayfl` int(11) NOT
NULL COMMENT "", `d_weekdayfl` int(11) NOT NULL COMMENT "" ) ENGINE=OLAP
DUPLICATE KEY(`d_datekey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`d_datekey`)
BUCKETS 10 PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `supplier` ( `s_suppkey` int(11) NOT NULL COMMENT
"", `s_name` varchar(26) NOT NULL COMMENT "", `s_address` varchar(26) NOT
NULL COMMENT "", `s_city` varchar(11) NOT NULL COMMENT "", `s_nation`
varchar(16) NOT NULL COMMENT "", `s_region` varchar(13) NOT NULL COMMENT "",
`s_phone` varchar(16) NOT NULL COMMENT "" ) ENGINE=OLAP DUPLICATE
KEY(`s_suppkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 10
PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
CREATE TABLE IF NOT EXISTS `part` ( `p_partkey` int(11) NOT NULL COMMENT "",
`p_name` varchar(23) NOT NULL COMMENT "", `p_mfgr` varchar(7) NOT NULL COMMENT
"", `p_category` varchar(8) NOT NULL COMMENT "", `p_brand` varchar(10) NOT
NULL COMMENT "", `p_color` varchar(12) NOT NULL COMMENT "", `p_type`
varchar(26) NOT NULL COMMENT "", `p_size` int(11) NOT NULL COMMENT "",
`p_container` varchar(11) NOT NULL COMMENT "" ) ENGINE=OLAP DUPLICATE
KEY(`p_partkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 10
PROPERTIES ( "replication_num" = "1", "in_memory" = "false");
ClickHouse
CREATE TABLE IF NOT EXISTS `lineorder_flat` ( `LO_ORDERKEY` int(11) NOT NULL
COMMENT "", `LO_ORDERDATE` date NOT NULL COMMENT "", `LO_LINENUMBER`
tinyint(4) NOT NULL COMMENT "", `LO_CUSTKEY` int(11) NOT NULL COMMENT "",
`LO_PARTKEY` int(11) NOT NULL COMMENT "", `LO_SUPPKEY` int(11) NOT NULL COMMENT
"", `LO_ORDERPRIORITY` varchar(100) NOT NULL COMMENT "", `LO_SHIPPRIORITY`
tinyint(4) NOT NULL COMMENT "", `LO_QUANTITY` tinyint(4) NOT NULL COMMENT "",
`LO_EXTENDEDPRICE` int(11) NOT NULL COMMENT "", `LO_ORDTOTALPRICE` int(11) NOT
NULL COMMENT "", `LO_DISCOUNT` tinyint(4) NOT NULL COMMENT "", `LO_REVENUE`
int(11) NOT NULL COMMENT "", `LO_SUPPLYCOST` int(11) NOT NULL COMMENT "",
`LO_TAX` tinyint(4) NOT NULL COMMENT "", `LO_COMMITDATE` date NOT NULL COMMENT
"", `LO_SHIPMODE` varchar(100) NOT NULL COMMENT "", `C_NAME` varchar(100) NOT
NULL COMMENT "", `C_ADDRESS` varchar(100) NOT NULL COMMENT "", `C_CITY`
varchar(100) NOT NULL COMMENT "", `C_NATION` varchar(100) NOT NULL COMMENT "",
`C_REGION` varchar(100) NOT NULL COMMENT "", `C_PHONE` varchar(100) NOT NULL
COMMENT "", `C_MKTSEGMENT` varchar(100) NOT NULL COMMENT "", `S_NAME`
varchar(100) NOT NULL COMMENT "", `S_ADDRESS` varchar(100) NOT NULL COMMENT "",
`S_CITY` varchar(100) NOT NULL COMMENT "", `S_NATION` varchar(100) NOT NULL
COMMENT "", `S_REGION` varchar(100) NOT NULL COMMENT "", `S_PHONE`
varchar(100) NOT NULL COMMENT "", `P_NAME` varchar(100) NOT NULL COMMENT "",
`P_MFGR` varchar(100) NOT NULL COMMENT "", `P_CATEGORY` varchar(100) NOT NULL
COMMENT "", `P_BRAND` varchar(100) NOT NULL COMMENT "", `P_COLOR`
varchar(100) NOT NULL COMMENT "", `P_TYPE` varchar(100) NOT NULL COMMENT "",
`P_SIZE` tinyint(4) NOT NULL COMMENT "", `P_CONTAINER` varchar(100) NOT NULL
COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERKEY`) COMMENT "OLAP" PARTITION BY
RANGE(`LO_ORDERDATE`) (PARTITION p1 VALUES [('0000-01-01'), ('1993-01-01')),
PARTITION p2 VALUES [('1993-01-01'), ('1994-01-01')), PARTITION p3 VALUES
[('1994-01-01'), ('1995-01-01')), PARTITION p4 VALUES [('1995-01-01'), ('1996-01-
01')), PARTITION p5 VALUES [('1996-01-01'), ('1997-01-01')), PARTITION p6 VALUES
[('1997-01-01'), ('1998-01-01')), PARTITION p7 VALUES [('1998-01-01'), ('1999-01-
01'))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 60 PROPERTIES (
"replication_num" = "1", "in_memory" = "false");