Skip to content

Commit 933518c

Browse files
authored
Merge pull request #324 from JelteF/update-pg_duckdb-numbers-2
Update pg_duckdb numbers for v0.3.1
2 parents 315962a + 7b47dd3 commit 933518c

File tree

9 files changed

+330
-227
lines changed

9 files changed

+330
-227
lines changed

pg_duckdb-motherduck/benchmark.sh

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#!/bin/bash
22

3-
set -e
3+
set -ex
44

55
# Setup on Ubuntu (your package manager may vary):
66
# sudo snap install docker
7-
# sudo apt install postgresql-client-common
8-
# sudo apt install postgresql-client-16
7+
# sudo apt install postgresql-client
98

109
# Note: To get equivalent performance you should be running from
1110
# AWS US-EAST-1 region or as close to there as possible. Otherwise
1211
# you'll see additional latency.
1312

14-
# Sign up for MotherDuck.
13+
# Sign up for MotherDuck.
1514
# Go to the web ui and obtain a token
1615
# https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/authenticating-to-motherduck/
1716
# Save the token as the MOTHERDUCK_TOKEN environment variable:
@@ -24,10 +23,10 @@ if [ -z "${MOTHERDUCK_TOKEN}" ]; then
2423
exit 1
2524
fi
2625

27-
sudo docker run -d --name pgduck --network=host -e POSTGRES_PASSWORD=duckdb -e MOTHERDUCK_TOKEN=${MOTHERDUCK_TOKEN} pgduckdb/pgduckdb:16-main -c duckdb.motherduck_enabled=true
26+
sudo docker run -d --name pgduck --network=host -e POSTGRES_PASSWORD=duckdb -e MOTHERDUCK_TOKEN=${MOTHERDUCK_TOKEN} pgduckdb/pgduckdb:17-v0.3.1 -c duckdb.motherduck_enabled=true
2827

2928
# Give postgres time to start running
30-
sleep 10
29+
sleep 10
3130

3231
./load.sh 2>&1 | tee load_log.txt
3332

@@ -39,4 +38,3 @@ sleep 10
3938

4039
cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |
4140
awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
42-

pg_duckdb-motherduck/create.sql

+108-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,108 @@
1-
create or replace view hits_view as
2-
select WatchID, JavaEnable, Title, GoodEvent, 'epoch'::timestamp + (EventTime || 'second')::interval EventTime, 'epoch'::timestamp + (EventDate || 'day')::interval EventDate, CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, 'epoch'::timestamp + (ClientEventTime || 'second')::interval ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, FUniqID, OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, 'epoch'::timestamp + (LocalEventTime || 'second')::interval LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash, URLHash, CLID
3-
from read_parquet('REPLACE_PARQUET_FILE') as (WatchID BIGINT, JavaEnable SMALLINT, Title VARCHAR, GoodEvent SMALLINT, EventTime BIGINT, EventDate int, CounterID INTEGER, ClientIP INTEGER, RegionID INTEGER, UserID BIGINT, CounterClass SMALLINT, OS SMALLINT, UserAgent SMALLINT, URL VARCHAR, Referer VARCHAR, IsRefresh SMALLINT, RefererCategoryID SMALLINT, RefererRegionID INTEGER, URLCategoryID SMALLINT, URLRegionID INTEGER, ResolutionWidth SMALLINT, ResolutionHeight SMALLINT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 VARCHAR, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor SMALLINT, UserAgentMinor VARCHAR, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel VARCHAR, Params VARCHAR, IPNetworkID INTEGER, TraficSourceID SMALLINT, SearchEngineID SMALLINT, SearchPhrase VARCHAR, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth SMALLINT, WindowClientHeight SMALLINT, ClientTimeZone SMALLINT, ClientEventTime BIGINT, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 INTEGER, SilverlightVersion4 SMALLINT, PageCharset VARCHAR, CodeVersion INTEGER, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL VARCHAR, HID INTEGER, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor VARCHAR, LocalEventTime BIGINT, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests SMALLINT, Robotness SMALLINT, RemoteIP INTEGER, WindowName INTEGER, OpenerName INTEGER, HistoryLength SMALLINT, BrowserLanguage VARCHAR, BrowserCountry VARCHAR, SocialNetwork VARCHAR, SocialAction VARCHAR, HTTPError SMALLINT, SendTiming INTEGER, DNSTiming INTEGER, ConnectTiming INTEGER, ResponseStartTiming INTEGER, ResponseEndTiming INTEGER, FetchTiming INTEGER, SocialSourceNetworkID SMALLINT, SocialSourcePage VARCHAR, ParamPrice BIGINT, ParamOrderID VARCHAR, ParamCurrency VARCHAR, ParamCurrencyID SMALLINT, OpenstatServiceName VARCHAR, OpenstatCampaignID VARCHAR, OpenstatAdID VARCHAR, OpenstatSourceID VARCHAR, UTMSource VARCHAR, UTMMedium VARCHAR, UTMCampaign VARCHAR, UTMContent VARCHAR, UTMTerm VARCHAR, FromTag VARCHAR, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID INTEGER);
4-
CREATE table REPLACE_SCHEMA.hits USING DUCKDB AS SELECT * FROM hits_view;
1+
CREATE TABLE REPLACE_SCHEMA.hits USING duckdb AS
2+
select
3+
r['WatchID'] AS WatchID,
4+
r['JavaEnable'] AS JavaEnable,
5+
r['Title']::text AS Title,
6+
r['GoodEvent'] AS GoodEvent,
7+
('epoch'::timestamp + (r['EventTime'] * interval '1 second'))::timestamp AS EventTime,
8+
(DATE '1970-01-01' + (r['EventDate'] * interval '1 day'))::date AS EventDate,
9+
r['CounterID'] AS CounterID,
10+
r['ClientIP'] AS ClientIP,
11+
r['RegionID'] AS RegionID,
12+
r['UserID'] AS UserID,
13+
r['CounterClass'] AS CounterClass,
14+
r['OS'] AS OS,
15+
r['UserAgent'] AS UserAgent,
16+
r['URL']::text AS URL,
17+
r['Referer']::text AS Referer,
18+
r['IsRefresh'] AS IsRefresh,
19+
r['RefererCategoryID'] AS RefererCategoryID,
20+
r['RefererRegionID'] AS RefererRegionID,
21+
r['URLCategoryID'] AS URLCategoryID,
22+
r['URLRegionID'] AS URLRegionID,
23+
r['ResolutionWidth'] AS ResolutionWidth,
24+
r['ResolutionHeight'] AS ResolutionHeight,
25+
r['ResolutionDepth'] AS ResolutionDepth,
26+
r['FlashMajor'] AS FlashMajor,
27+
r['FlashMinor'] AS FlashMinor,
28+
r['FlashMinor2'] AS FlashMinor2,
29+
r['NetMajor'] AS NetMajor,
30+
r['NetMinor'] AS NetMinor,
31+
r['UserAgentMajor'] AS UserAgentMajor,
32+
r['UserAgentMinor'] AS UserAgentMinor,
33+
r['CookieEnable'] AS CookieEnable,
34+
r['JavascriptEnable'] AS JavascriptEnable,
35+
r['IsMobile'] AS IsMobile,
36+
r['MobilePhone'] AS MobilePhone,
37+
r['MobilePhoneModel'] AS MobilePhoneModel,
38+
r['Params'] AS Params,
39+
r['IPNetworkID'] AS IPNetworkID,
40+
r['TraficSourceID'] AS TraficSourceID,
41+
r['SearchEngineID'] AS SearchEngineID,
42+
r['SearchPhrase'] AS SearchPhrase,
43+
r['AdvEngineID'] AS AdvEngineID,
44+
r['IsArtifical'] AS IsArtifical,
45+
r['WindowClientWidth'] AS WindowClientWidth,
46+
r['WindowClientHeight'] AS WindowClientHeight,
47+
r['ClientTimeZone'] AS ClientTimeZone,
48+
('epoch'::timestamp + (r['ClientEventTime'] * interval '1 second'))::timestamp AS ClientEventTime,
49+
r['SilverlightVersion1'] AS SilverlightVersion1,
50+
r['SilverlightVersion2'] AS SilverlightVersion2,
51+
r['SilverlightVersion3'] AS SilverlightVersion3,
52+
r['SilverlightVersion4'] AS SilverlightVersion4,
53+
r['PageCharset'] AS PageCharset,
54+
r['CodeVersion'] AS CodeVersion,
55+
r['IsLink'] AS IsLink,
56+
r['IsDownload'] AS IsDownload,
57+
r['IsNotBounce'] AS IsNotBounce,
58+
r['FUniqID'] AS FUniqID,
59+
r['OriginalURL'] AS OriginalURL,
60+
r['HID'] AS HID,
61+
r['IsOldCounter'] AS IsOldCounter,
62+
r['IsEvent'] AS IsEvent,
63+
r['IsParameter'] AS IsParameter,
64+
r['DontCountHits'] AS DontCountHits,
65+
r['WithHash'] AS WithHash,
66+
r['HitColor'] AS HitColor,
67+
('epoch'::timestamp + (r['LocalEventTime'] * interval '1 second'))::timestamp AS LocalEventTime,
68+
r['Age'] AS Age,
69+
r['Sex'] AS Sex,
70+
r['Income'] AS Income,
71+
r['Interests'] AS Interests,
72+
r['Robotness'] AS Robotness,
73+
r['RemoteIP'] AS RemoteIP,
74+
r['WindowName'] AS WindowName,
75+
r['OpenerName'] AS OpenerName,
76+
r['HistoryLength'] AS HistoryLength,
77+
r['BrowserLanguage'] AS BrowserLanguage,
78+
r['BrowserCountry'] AS BrowserCountry,
79+
r['SocialNetwork'] AS SocialNetwork,
80+
r['SocialAction'] AS SocialAction,
81+
r['HTTPError'] AS HTTPError,
82+
r['SendTiming'] AS SendTiming,
83+
r['DNSTiming'] AS DNSTiming,
84+
r['ConnectTiming'] AS ConnectTiming,
85+
r['ResponseStartTiming'] AS ResponseStartTiming,
86+
r['ResponseEndTiming'] AS ResponseEndTiming,
87+
r['FetchTiming'] AS FetchTiming,
88+
r['SocialSourceNetworkID'] AS SocialSourceNetworkID,
89+
r['SocialSourcePage'] AS SocialSourcePage,
90+
r['ParamPrice'] AS ParamPrice,
91+
r['ParamOrderID'] AS ParamOrderID,
92+
r['ParamCurrency'] AS ParamCurrency,
93+
r['ParamCurrencyID'] AS ParamCurrencyID,
94+
r['OpenstatServiceName'] AS OpenstatServiceName,
95+
r['OpenstatCampaignID'] AS OpenstatCampaignID,
96+
r['OpenstatAdID'] AS OpenstatAdID,
97+
r['OpenstatSourceID'] AS OpenstatSourceID,
98+
r['UTMSource'] AS UTMSource,
99+
r['UTMMedium'] AS UTMMedium,
100+
r['UTMCampaign'] AS UTMCampaign,
101+
r['UTMContent'] AS UTMContent,
102+
r['UTMTerm'] AS UTMTerm,
103+
r['FromTag'] AS FromTag,
104+
r['HasGCLID'] AS HasGCLID,
105+
r['RefererHash'] AS RefererHash,
106+
r['URLHash'] AS URLHash,
107+
r['CLID'] AS CLID
108+
from read_parquet('REPLACE_PARQUET_FILE', binary_as_string => true) r;

pg_duckdb-motherduck/load.sh

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#!/bin/bash
22

3+
set -ex
4+
35
CONNECTION=postgres://postgres:duckdb@localhost:5432/postgres
4-
PSQL=psql
56

67
DATABASE='ddb$pgclick'
7-
PARQUET_FILE='https:\/\/datasets.clickhouse.com\/hits_compatible\/hits.parquet'
8+
PARQUET_FILE='https://datasets.clickhouse.com/hits_compatible/hits.parquet'
89

910
echo "Loading data"
1011
(
11-
echo "\timing"
12-
cat create.sql |
13-
sed -e "s/REPLACE_SCHEMA/$DATABASE/g" -e "s/REPLACE_PARQUET_FILE/$PARQUET_FILE/g"
14-
) | $PSQL $CONNECTION | grep 'Time'
15-
12+
echo "\timing"
13+
cat create.sql |
14+
sed -e "s=REPLACE_SCHEMA=$DATABASE=g" -e "s=REPLACE_PARQUET_FILE=$PARQUET_FILE=g"
15+
) | psql --no-psqlrc --tuples-only $CONNECTION | grep 'Time'
+47-47
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,57 @@
11
{
22
"system": "pg_duckdb (MotherDuck enabled)",
3-
"date": "2024-11-23",
4-
"machine": "cloud",
3+
"date": "2024-03-07",
4+
"machine": "Jumbo",
55
"cluster_size": "serverless",
66

77
"tags": ["managed", "column-oriented", "PostgreSQL compatible", "serverless"],
88

9-
"load_time": 91.614,
10-
"data_size": 26843545600,
9+
"load_time": 118.616,
10+
"data_size": 26306674688,
1111

1212
"result": [
13-
[0.138964,0.015258,0.016077],
14-
[0.132879,0.014631,0.016375],
15-
[0.150402,0.020076,0.018707],
16-
[0.136571,0.024532,0.024149],
17-
[0.269683,0.148919,0.15068],
18-
[0.320305,0.181124,0.178617],
19-
[0.130833,0.014634,0.014709],
20-
[0.130438,0.019109,0.019483],
21-
[0.307982,0.182397,0.183229],
22-
[0.360765,0.240961,0.24059],
23-
[0.181567,0.06479,0.064691],
24-
[0.208976,0.072179,0.072965],
25-
[0.302809,0.170854,0.170643],
26-
[0.437126,0.324878,0.318962],
27-
[0.296762,0.180741,0.182182],
28-
[0.322796,0.189612,0.189133],
29-
[0.488604,0.375323,0.371993],
30-
[0.443552,0.329381,0.357173],
31-
[0.762512,0.658258,0.641012],
32-
[0.140095,0.017002,0.017396],
33-
[0.765095,0.302509,0.2995],
34-
[0.349961,0.228921,0.224052],
35-
[0.472621,0.360402,0.356845],
36-
[1.39679,1.24423,1.21561],
37-
[0.18372,0.066296,0.066534],
38-
[0.177599,0.066766,0.06741],
39-
[0.205539,0.083399,0.082905],
40-
[0.470257,0.365256,0.354026],
41-
[1.5107,1.53563,1.55076],
42-
[0.849038,0.72794,0.728566],
43-
[0.304129,0.153122,0.152549],
44-
[0.321213,0.200797,0.198382],
45-
[0.877334,0.761022,0.761743],
46-
[0.937143,0.801845,0.806956],
47-
[0.935478,0.832431,0.811753],
48-
[0.382849,0.250345,0.267334],
49-
[0.168527,0.049099,0.048662],
50-
[0.142103,0.0414,0.025252],
51-
[0.143525,0.032741,0.033455],
52-
[0.234511,0.097073,0.095511],
53-
[0.143504,0.017825,0.018593],
54-
[0.143221,0.016469,0.016562],
55-
[0.14678,0.018243,0.018707]
13+
[0.170077,0.020405,0.018909],
14+
[0.168677,0.012225,0.012099],
15+
[0.189802,0.023631,0.027872],
16+
[0.186599,0.0263,0.026218],
17+
[0.317828,0.169011,0.165866],
18+
[0.394605,0.205204,0.199935],
19+
[0.165392,0.01788,0.019311],
20+
[0.144629,0.018512,0.018592],
21+
[0.331199,0.19267,0.193772],
22+
[0.394099,0.263567,0.262257],
23+
[0.212391,0.059484,0.064996],
24+
[0.198988,0.06339,0.063119],
25+
[0.313906,0.183646,0.192615],
26+
[0.483758,0.350229,0.343033],
27+
[0.32302,0.190289,0.187028],
28+
[0.315957,0.174511,0.174815],
29+
[0.551715,0.417263,0.417878],
30+
[0.501638,0.385333,0.381008],
31+
[0.926175,0.728146,0.715856],
32+
[0.122588,0.009631,0.009645],
33+
[0.880683,0.324568,0.324565],
34+
[0.336414,0.187099,0.187246],
35+
[0.757718,0.27547,0.275527],
36+
[2.57329,1.19426,1.21685],
37+
[0.271017,0.100878,0.096585],
38+
[0.189046,0.069643,0.062972],
39+
[0.176091,0.069487,0.06979],
40+
[0.538689,0.390095,0.390029],
41+
[4.19605,4.04684,4.04014],
42+
[0.213735,0.085933,0.04037],
43+
[0.307626,0.177809,0.15867],
44+
[0.336156,0.197062,0.19352],
45+
[1.68052,1.76627,1.34655],
46+
[1.91314,1.05903,1.27396],
47+
[1.29045,1.15402,1.24263],
48+
[0.421277,0.2593,0.259694],
49+
[0.195714,0.044441,0.043828],
50+
[0.156812,0.019686,0.020041],
51+
[0.1426,0.023138,0.020315],
52+
[0.250354,0.079622,0.080513],
53+
[0.147665,0.015038,0.01456],
54+
[0.150182,0.015326,0.015074],
55+
[0.147646,0.01833,0.018027]
5656
]
5757
}

pg_duckdb-motherduck/run.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ CONNECTION=postgres://postgres:duckdb@localhost:5432/postgres
55

66
DATABASE='ddb$pgclick'
77

8-
cat queries.sql | while read query; do
8+
cat queries.sql | while read -r query; do
99
echo "$query"
1010
(
11-
echo "set search_path=$DATABASE;"
11+
echo "set search_path=$DATABASE;"
1212
echo '\timing'
1313
yes "$query" | head -n $TRIES
14-
) | psql $CONNECTION | grep 'Time'
14+
) | psql --no-psqlrc --tuples-only $CONNECTION | grep 'Time'
1515
done

pg_duckdb_parquet/benchmark.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set -ex
77
#sudo apt-get install -y postgresql-client
88

99
wget --no-verbose --continue https://datasets.clickhouse.com/hits_compatible/athena/hits.parquet
10-
sudo docker run -d --name pgduck -p 5432:5432 -e POSTGRES_PASSWORD=duckdb -v ./hits.parquet:/tmp/hits.parquet pgduckdb/pgduckdb:16-main
10+
sudo docker run -d --name pgduck -p 5432:5432 -e POSTGRES_PASSWORD=duckdb -v ./hits.parquet:/tmp/hits.parquet pgduckdb/pgduckdb:17-v0.3.1 -c duckdb.max_memory=10GB
1111

1212
sleep 5
1313
psql postgres://postgres:duckdb@localhost:5432/postgres -f create.sql

0 commit comments

Comments
 (0)