Skip to content

Commit 548ad46

Browse files
authored
Merge pull request #302 from Mytherin/duckdb-v1.2.0
2 parents bcf821e + 9da39ad commit 548ad46

15 files changed

+250
-280
lines changed

duckdb-memory/benchmark.sh

-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,3 @@ cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmen
1919
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
2020

2121
/usr/bin/time -v ./memory.py
22-

duckdb-parquet/benchmark.sh

+16-7
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,30 @@
11
#!/bin/bash
22

33
# Install
4-
54
sudo apt-get update
6-
sudo apt-get install -y python3-pip
7-
pip install --break-system-packages duckdb==1.1.3 psutil
5+
sudo apt-get install ninja-build cmake build-essential make ccache pip clang -y
6+
7+
export CC=clang
8+
export CXX=clang++
9+
git clone https://github.com/duckdb/duckdb
10+
cd duckdb
11+
git checkout v1.2-histrionicus
12+
GEN=ninja NATIVE_ARCH=1 LTO=thin make
13+
export PATH="$PATH:`pwd`/build/release/"
14+
cd ..
815

916
# Load the data
1017
seq 0 99 | xargs -P100 -I{} bash -c 'wget --no-verbose --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
1118

12-
./load.py
19+
time duckdb hits.db -f create.sql
1320

1421
# Run the queries
1522

1623
./run.sh 2>&1 | tee log.txt
1724

18-
wc -c my-db.duckdb
25+
wc -c hits.db
1926

20-
cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' |
21-
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
27+
cat log.txt |
28+
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
29+
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
30+
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

duckdb-parquet/create.sql

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CREATE VIEW hits AS
22
SELECT *
3-
REPLACE
4-
(epoch_ms(EventTime * 1000) AS EventTime,
5-
DATE '1970-01-01' + INTERVAL (EventDate) DAYS AS EventDate)
6-
FROM read_parquet('hits_*.parquet', binary_as_string=True);
3+
REPLACE (make_date(EventDate) AS EventDate)
4+
FROM read_parquet('hits_*.parquet', binary_as_string=True);
5+
6+
CREATE MACRO toDateTime(t) AS epoch_ms(t * 1000);

duckdb-parquet/load.py

-14
This file was deleted.

duckdb-parquet/queries.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase
1616
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
1717
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
1818
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
19-
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
19+
SELECT UserID, extract(minute FROM toDateTime(EventTime)) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
2020
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
2121
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
2222
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
@@ -40,4 +40,4 @@ SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >
4040
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
4141
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
4242
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
43-
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
43+
SELECT DATE_TRUNC('minute', toDateTime(EventTime)) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', toDateTime(EventTime)) ORDER BY DATE_TRUNC('minute', toDateTime(EventTime)) LIMIT 10 OFFSET 1000;

duckdb-parquet/query.py

-17
This file was deleted.
+44-44
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"system": "DuckDB (Parquet, partitioned)",
3-
"date": "2024-11-27",
3+
"date": "2025-02-05",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
66
"comment": "",
@@ -11,48 +11,48 @@
1111
"data_size": 14737666736,
1212

1313
"result": [
14-
[0.11052128000000039,0.07510095800000727,0.07482720299998391],
15-
[0.12390973099999769,0.08799121000001264,0.08659421100000486],
16-
[0.1802913619999913,0.11165366699998458,0.11256087000001003],
17-
[0.44857502400000726,0.11868003600000065,0.11416359200001125],
18-
[1.2294751780000013,0.42031310699999835,0.41501899200000025],
19-
[1.0619799039999975,0.704478875999996,0.6945510980000051],
20-
[0.463516052000017,0.4205021790000103,0.42350347199999305],
21-
[0.14255578700002047,0.09078143099998215,0.091366378999993],
22-
[0.9319725910000045,0.5142498320000186,0.5172727039999927],
23-
[1.3762876150000238,0.7246325790000014,0.7255065909999985],
24-
[0.5583005390000153,0.19166810400000145,0.1881575449999957],
25-
[1.1524824470000112,0.22536062500000753,0.22139205199999878],
26-
[1.4437344370000176,0.5995674480000162,0.5936260590000018],
27-
[2.805760178000014,0.9231901580000113,0.9175548540000023],
28-
[1.1569440859999816,0.651196504000012,0.6422786660000099],
29-
[0.6690660820000005,0.47228665700001216,0.4744386749999876],
30-
[2.791790689999999,1.121874450000007,1.1247018900000114],
31-
[2.7981470900000147,1.125944488000016,1.141634646],
32-
[5.138499918000008,1.9534744589999775,1.9856906880000054],
33-
[0.20532803100002184,0.10882481699999857,0.11244283699997482],
34-
[10.62995572700001,1.844938342000006,1.8348558930000252],
35-
[11.701504103000019,1.7005332410000165,1.7053109770000106],
36-
[22.958297070000015,3.5526050710000163,3.555595224000001],
37-
[58.88756602899997,10.639712609000014,10.690652277000027],
38-
[2.99689328300002,0.4794005240000274,0.483271789000014],
39-
[0.9076006049999705,0.36497371699999803,0.35227883999999676],
40-
[2.994342438999979,0.4895540190000247,0.479844522999997],
41-
[9.353536024999983,1.4645006279999961,1.4759320379999963],
42-
[10.568858570999964,10.158934629999976,10.11379384899999],
43-
[4.335202347000006,4.255150033000007,4.283988227999998],
44-
[2.25947083799997,0.7053096309999773,0.7094401290000292],
45-
[5.932542424000019,0.8128257209999674,0.8173750340000083],
46-
[5.391661251000016,2.37234533000003,2.3208716000000322],
47-
[9.879454598000052,2.6484584210000435,2.640947945999983],
48-
[9.876968317999967,2.6066815840000004,2.639813798999967],
49-
[0.7822432080000112,0.6648381679999602,0.6626834680000115],
50-
[0.24420177100000728,0.14329177199999776,0.15229572799995594],
51-
[0.15579374700001836,0.11827062200001137,0.10689886399995885],
52-
[0.1992192829999908,0.10196714500000326,0.10160259900004576],
53-
[0.4412357339999744,0.2678184920000035,0.26373995500000547],
54-
[0.12147571700000981,0.08117302400000881,0.08039329399997541],
55-
[0.1181243000000336,0.08039320299997144,0.07783418700000766],
56-
[0.11289640699999381,0.07863085899998623,0.07738770099996373]
14+
[0.164,0.085,0.082],
15+
[0.119,0.060,0.061],
16+
[0.192,0.088,0.086],
17+
[0.359,0.081,0.079],
18+
[1.178,0.353,0.363],
19+
[0.828,0.460,0.457],
20+
[0.113,0.061,0.061],
21+
[0.127,0.063,0.060],
22+
[0.717,0.465,0.465],
23+
[1.059,0.596,0.578],
24+
[0.443,0.151,0.148],
25+
[1.016,0.185,0.182],
26+
[1.136,0.490,0.476],
27+
[2.430,0.919,0.880],
28+
[0.930,0.540,0.535],
29+
[0.564,0.420,0.420],
30+
[2.350,1.021,1.021],
31+
[2.122,0.791,0.789],
32+
[4.587,1.930,1.950],
33+
[0.251,0.074,0.073],
34+
[9.957,1.197,1.194],
35+
[10.999,0.684,0.668],
36+
[21.527,1.392,1.410],
37+
[33.496,5.369,5.336],
38+
[0.196,0.094,0.094],
39+
[0.975,0.248,0.253],
40+
[0.178,0.093,0.092],
41+
[9.984,0.896,0.889],
42+
[9.955,9.601,9.455],
43+
[0.160,0.073,0.074],
44+
[2.248,0.578,0.571],
45+
[5.922,0.682,0.676],
46+
[5.447,2.040,2.056],
47+
[9.888,2.372,2.386],
48+
[9.854,2.403,2.458],
49+
[0.746,0.587,0.590],
50+
[0.209,0.111,0.111],
51+
[0.155,0.088,0.087],
52+
[0.150,0.057,0.055],
53+
[0.394,0.219,0.217],
54+
[0.115,0.053,0.050],
55+
[0.116,0.056,0.055],
56+
[0.101,0.051,0.047]
5757
]
5858
}

duckdb-parquet/results/c6a.metal.json

+44-44
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"system": "DuckDB (Parquet, partitioned)",
3-
"date": "2024-11-27",
3+
"date": "2025-02-05",
44
"machine": "c6a.metal, 500gb gp2",
55
"cluster_size": 1,
66
"comment": "",
@@ -11,48 +11,48 @@
1111
"data_size": 14737666736,
1212

1313
"result": [
14-
[0.22064165999836405,0.10886333600137732,0.10035635300300783],
15-
[0.2090159239996865,0.10079622400007793,0.08587361900208634],
16-
[0.24158671799887088,0.10498254399863072,0.11356378700293135],
17-
[0.39156127000023844,0.1437904560007155,0.11517444600031013],
18-
[0.9492011050024303,0.23534582900174428,0.24136485300186905],
19-
[0.9057871529985277,0.30914358100199024,0.23769009399984498],
20-
[0.24534914900141302,0.1509247879985196,0.162418247997266],
21-
[0.23070556400125497,0.11584803399819066,0.11269584900219343],
22-
[0.7073371700025746,0.26606066500244197,0.22921901900190278],
23-
[1.0556358089997957,0.295529817998613,0.26568887699977495],
24-
[0.4892407519982953,0.1328769840001769,0.16416157899948303],
25-
[0.8916413730003114,0.16927299000235507,0.15134500799831585],
26-
[1.1896540300003835,0.28654627699870616,0.27039848700223956],
27-
[2.5535861489988747,0.4687831190021825,0.42421876300068107],
28-
[0.9975665820020367,0.362422553000215,0.2820111139990331],
29-
[0.5269063039995672,0.39250073700168286,0.29078964099971927],
30-
[2.2770834360017034,0.8704268709989265,0.7373391680011991],
31-
[2.285241853998741,0.9774307129991939,0.7003227230015909],
32-
[4.292322358000092,0.9742372030013939,0.6998918900026183],
33-
[0.2506510780003737,0.11718380399906891,0.1293006730011257],
34-
[9.77101984500041,0.527151017999131,0.431143694997445],
35-
[10.999202312999842,0.3956347899984394,0.3579404030024307],
36-
[21.5540906310016,0.6871144640026614,0.5319933459977619],
37-
[55.29203715499898,1.9404848179983674,1.7484993639991444],
38-
[2.612326053000288,0.1912991279968992,0.1776238040001772],
39-
[0.918034816000727,0.19911728600345668,0.17915523299961933],
40-
[2.8879784470009326,0.21927593299915316,0.20239478800067445],
41-
[9.361416278999968,0.46229383399986546,0.409067109998432],
42-
[8.779426084001898,2.899777353999525,3.370570749997569],
43-
[0.7902931309981795,0.5913719390009646,0.5893235910007206],
44-
[2.2267992609995417,0.2571639199995843,0.24316665399965132],
45-
[5.867168218999723,0.4684128890003194,0.4540498100031982],
46-
[4.697866193000664,0.9545511869982874,0.7677795919989876],
47-
[9.78189673399902,1.4304991219978547,1.0890957099982188],
48-
[9.900697042001411,1.1249232040026982,1.0547837240010267],
49-
[0.6744621270008793,0.30249466600071173,0.2852872900002694],
50-
[0.26121385999795166,0.16722514600041904,0.18514311000035377],
51-
[0.21776700200280175,0.11190589800025919,0.1330010960009531],
52-
[0.2297739960013132,0.11809357400125009,0.1213479809994169],
53-
[0.46232909700120217,0.28833456100255717,0.27430850900054793],
54-
[0.2237862940019113,0.1302182739964337,0.10985613000229932],
55-
[0.22377537700231187,0.10815454900148325,0.11889209400032996],
56-
[0.2163319399987813,0.11454355099704117,0.1296722500010219]
14+
[0.277,0.140,0.149],
15+
[0.129,0.072,0.068],
16+
[0.136,0.081,0.081],
17+
[0.329,0.080,0.065],
18+
[0.950,0.145,0.149],
19+
[0.829,0.200,0.199],
20+
[0.124,0.063,0.068],
21+
[0.149,0.092,0.078],
22+
[0.716,0.167,0.161],
23+
[1.077,0.188,0.177],
24+
[0.417,0.097,0.092],
25+
[0.807,0.103,0.100],
26+
[1.155,0.240,0.226],
27+
[2.313,0.407,0.340],
28+
[0.886,0.225,0.214],
29+
[0.492,0.172,0.186],
30+
[2.139,0.337,0.304],
31+
[2.103,0.282,0.281],
32+
[4.014,0.466,0.433],
33+
[0.177,0.070,0.060],
34+
[9.792,0.331,0.302],
35+
[10.963,0.216,0.191],
36+
[21.466,0.375,0.320],
37+
[48.898,1.545,1.231],
38+
[0.183,0.097,0.084],
39+
[0.890,0.110,0.094],
40+
[0.443,0.097,0.086],
41+
[9.775,0.274,0.228],
42+
[8.982,1.978,1.881],
43+
[0.157,0.092,0.078],
44+
[2.203,0.203,0.179],
45+
[5.725,0.218,0.214],
46+
[4.437,0.695,0.686],
47+
[9.814,0.960,0.713],
48+
[9.826,0.845,0.827],
49+
[0.313,0.170,0.167],
50+
[0.230,0.126,0.123],
51+
[0.192,0.107,0.101],
52+
[0.168,0.078,0.077],
53+
[0.349,0.214,0.213],
54+
[0.139,0.065,0.054],
55+
[0.130,0.078,0.091],
56+
[0.126,0.069,0.064]
5757
]
5858
}

duckdb-parquet/run.sh

+16-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
11
#!/bin/bash
22

3+
TRIES=3
4+
35
cat queries.sql | while read -r query; do
46
sync
5-
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
7+
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
68

7-
./query.py <<< "${query}"
8-
done
9+
echo "$query";
10+
cli_params=()
11+
cli_params+=("-c")
12+
cli_params+=("SET parquet_metadata_cache=true")
13+
cli_params+=("-c")
14+
cli_params+=(".timer on")
15+
for i in $(seq 1 $TRIES); do
16+
cli_params+=("-c")
17+
cli_params+=("${query}")
18+
done;
19+
echo "${cli_params[@]}"
20+
duckdb hits.db "${cli_params[@]}"
21+
done;

duckdb/benchmark.sh

+18-10
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,31 @@
11
#!/bin/bash
22

33
# Install
4-
54
sudo apt-get update
6-
sudo apt-get install -y python3-pip
7-
pip install --break-system-packages duckdb==1.1.3 psutil
5+
sudo apt-get install ninja-build cmake build-essential make ccache pip clang -y
86

9-
# Load the data
7+
export CC=clang
8+
export CXX=clang++
9+
git clone https://github.com/duckdb/duckdb
10+
cd duckdb
11+
git checkout v1.2-histrionicus
12+
GEN=ninja NATIVE_ARCH=1 LTO=thin make
13+
export PATH="$PATH:`pwd`/build/release/"
14+
cd ..
1015

11-
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
12-
gzip -d hits.csv.gz
16+
# Load the data
17+
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
18+
gzip -d hits.tsv.gz
1319

14-
./load.py
20+
time duckdb hits.db -f create.sql -c "COPY hits FROM 'hits.tsv' (QUOTE '')"
1521

1622
# Run the queries
1723

1824
./run.sh 2>&1 | tee log.txt
1925

20-
wc -c my-db.duckdb
26+
wc -c hits.db
2127

22-
cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' |
23-
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
28+
cat log.txt |
29+
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
30+
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
31+
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

duckdb/load.py

-22
This file was deleted.

0 commit comments

Comments
 (0)