Skip to content

Commit b58abd1

Browse files
authored
Merge pull request #303 from adam-szymanski/main
2 parents 7bf3933 + d1ffa19 commit b58abd1

File tree

4 files changed

+61
-65
lines changed

4 files changed

+61
-65
lines changed

oxla/benchmark.sh

+6-10
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
#!/bin/bash -e
22

33
# docker
4-
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
5-
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
6-
sudo apt update
7-
sudo apt install -y docker-ce
4+
sudo apt install docker.io
85

96
# base
107
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
@@ -15,26 +12,25 @@ echo "Download dataset."
1512
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
1613
echo "Unpack dataset."
1714
gzip -d hits.csv.gz
18-
chmod 777 ~ hits.csv
1915
mkdir data
20-
mv hits.csv ~/data
16+
mv hits.csv data
2117

2218
# get and configure Oxla image
2319
echo "Install and run Oxla."
2420

25-
sudo docker run --rm -p 5432:5432 -v ~/data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.20.0-beta > /dev/null 2>&1 &
21+
sudo docker run --rm -p 5432:5432 -v data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.53.0-beta > /dev/null 2>&1 &
2622
sleep 30 # waiting for container start and db initialisation (leader election, etc.)
2723

2824
# create table and ingest data
2925
export PGCLIENTENCODING=UTF8
3026

31-
psql -h localhost -t < create.sql
27+
PGPASSWORD=oxla psql -h localhost -U oxla -t < create.sql
3228
echo "Insert data."
33-
psql -h localhost -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"
29+
PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"
3430

3531
# get ingested data size
3632
echo "data size after ingest:"
37-
psql -h localhost -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"
33+
PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"
3834

3935
# wait for merges to finish
4036
sleep 60

oxla/queries.sql

+10-10
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,31 @@ SELECT COUNT(*) FROM hits;
22
SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
33
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
44
SELECT AVG(UserID) FROM hits;
5-
SELECT COUNT(*) FROM (SELECT UserId FROM hits GROUP BY UserId);
6-
SELECT COUNT(*) FROM (SELECT SearchPhrase FROM hits GROUP BY SearchPhrase);
5+
SELECT COUNT(DISTINCT UserID) FROM hits;
6+
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
77
SELECT MIN(EventDate), MAX(EventDate) FROM hits;
88
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
9-
SELECT RegionID, COUNT(*) AS u FROM (SELECT RegionID, UserID FROM hits GROUP BY RegionID, UserID) GROUP BY RegionID ORDER BY u DESC LIMIT 10;
10-
SELECT RegionID, SUM(AdvEngineIDSum), SUM(c) AS c, SUM(ResolutionWidthSum) / SUM(c), COUNT(*) FROM (SELECT SUM(AdvEngineID) AS AdvEngineIDSum, SUM(ResolutionWidth) AS ResolutionWidthSum, COUNT(*) AS c, RegionId, UserID FROM hits GROUP BY RegionID, UserID) GROUP BY RegionID ORDER BY c DESC LIMIT 10;
11-
SELECT MobilePhoneModel, COUNT(*) AS u FROM (SELECT MobilePhoneModel, UserID FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel, UserID) GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
12-
SELECT MobilePhoneModel, MobilePhone, COUNT(*) AS u FROM (SELECT MobilePhoneModel, MobilePhone, UserID FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel, MobilePhone, UserID) GROUP BY MobilePhoneModel, MobilePhone ORDER BY u DESC LIMIT 10;
9+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
10+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
11+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
12+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
1313
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
14-
SELECT SearchPhrase, COUNT(*) AS u FROM (SELECT SearchPhrase, UserID FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase, UserID) GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
14+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
1515
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
1616
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
1717
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
1818
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
1919
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
2020
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
2121
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
22-
SELECT NULL;
23-
SELECT NULL;
22+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
23+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
2424
SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
2525
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
2626
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
2727
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
2828
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29-
SELECT NULL;
29+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
3030
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
3131
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
3232
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;

oxla/results/c6a.4xlarge.json

+44-44
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"system": "Oxla",
3-
"date": "2024-04-09",
3+
"date": "2025-01-23",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
66
"comment": "Ingests data only from non-compressed cvs.",
@@ -11,48 +11,48 @@
1111
"data_size": 17394972923,
1212

1313
"result": [
14-
[3.112892,0.068225,0.049215],
15-
[1.476993,0.068502,0.01352],
16-
[1.532504,0.015794,0.01899],
17-
[1.541791,0.043208,0.090244],
18-
[1.424205,1.114138,1.079011],
19-
[1.546764,1.340306,1.339957],
20-
[0.202873,0.009787,0.008214],
21-
[1.017122,0.01247,0.010861],
22-
[1.790766,1.816432,1.681662],
23-
[2.06058,2.051205,2.05747],
24-
[0.166164,0.149605,0.147586],
25-
[0.33821,0.15334,0.15212],
26-
[0.968408,0.975795,0.932127],
27-
[1.641231,1.648973,1.691530],
28-
[1.039926,1.021776,1.015062],
29-
[1.059569,1.038191,1.016849],
30-
[2.930077,2.780725,2.786122],
31-
[2.7766,2.745188,2.827054],
32-
[5.474963,5.455883,5.462812],
33-
[0.069049,0.037876,0.030425],
34-
[5.294758,2.818725,2.803313],
35-
[null,null,null],
36-
[null,null,null],
37-
[21.034479,18.253271,6.146486],
38-
[0.17394,0.151798,0.146398],
39-
[0.180155,0.170271,0.177003],
40-
[0.22494,0.216158,0.216051],
41-
[0.978861,0.973059,0.964485],
42-
[null,null,null],
43-
[0.030928,0.02037,0.020366],
44-
[0.408601,0.412485,0.408602],
45-
[0.875709,0.743332,0.704842],
46-
[7.962516,7.867736,7.594272],
47-
[6.209667,5.892066,5.963681],
48-
[5.931634,5.947336,6.005506],
49-
[0.577314,0.583573,0.545736],
50-
[0.126127,0.090768,0.094307],
51-
[0.110712,0.04149,0.039939],
52-
[0.060824,0.043637,0.030213],
53-
[0.322545,0.204934,0.185178],
54-
[0.121207,0.011082,0.011699],
55-
[0.069138,0.012728,0.014108],
56-
[0.030538,0.028048,0.030625]
14+
[0.046851,0.02652,0.02648],
15+
[0.129717,0.010132,0.010436],
16+
[0.024405,0.017653,0.015578],
17+
[1.038988,0.026278,0.025259],
18+
[0.920077,0.867753,0.858069],
19+
[1.193297,1.094777,1.088436],
20+
[0.024024,0.015385,0.014366],
21+
[0.021601,0.015406,0.013076],
22+
[1.523167,1.323168,1.30051],
23+
[1.906564,1.755886,1.844895],
24+
[0.190242,0.1447,0.136949],
25+
[0.221494,0.159692,0.160614],
26+
[0.836624,0.8148,0.813433],
27+
[1.664004,1.635831,1.621079],
28+
[0.887504,0.885837,0.875387],
29+
[1.070728,0.938593,1.021919],
30+
[2.685652,2.644121,2.647854],
31+
[2.667017,2.658392,2.628829],
32+
[4.65067,4.626599,4.701596],
33+
[0.105851,0.080334,0.05596],
34+
[0.722496,0.705024,0.738724],
35+
[0.765263,0.767057,0.739108],
36+
[1.877141,1.390639,1.434695],
37+
[16.211858,3.861157,3.897472],
38+
[0.112136,0.099527,0.097523],
39+
[0.121357,0.111725,0.112716],
40+
[0.157924,0.150043,0.148599],
41+
[0.698874,0.674169,0.685363],
42+
[51.293703,51.584485,51.114784],
43+
[0.114097,0.099482,0.102921],
44+
[0.437985,0.415509,0.408477],
45+
[0.720325,0.674251,0.681977],
46+
[89.73327,81.154516,76.413726],
47+
[7.202697,6.053296,6.002008],
48+
[5.97555,5.824706,6.135259],
49+
[0.68644,0.655987,0.6812],
50+
[0.064488,0.06473,0.062154],
51+
[0.023134,0.029644,0.023701],
52+
[0.039028,0.05021,0.04079],
53+
[0.152121,0.144167,0.132704],
54+
[0.01652,0.012679,0.009873],
55+
[0.018828,0.030098,0.021353],
56+
[0.017781,0.017341,0.015782]
5757
]
5858
}

oxla/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cat queries.sql | while read -r query; do
1313
else
1414
results+="["
1515
for i in $(seq 1 $TRIES); do
16-
time=$(psql -h localhost -t -c '\timing' -c "$query" | grep 'Time' | perl -nle 'm/Time: ([^ ]*) ms/; print $1 / 1000')
16+
time=$(PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "$query" | grep 'Time' | perl -nle 'm/Time: ([^ ]*) ms/; print $1 / 1000')
1717
echo "$time s"
1818
results+="$time,"
1919
done

0 commit comments

Comments
 (0)