Skip to content

Commit e869b38

Browse files
committedNov 9, 2024
Add scripts to import geolite2 data
1 parent 97ef906 commit e869b38

8 files changed

+138
-0
lines changed
 

‎bin/util/format_geolite2_cities_csv

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
require 'csv'
4+
require 'ipaddr'
5+
6+
ARGV.each do |file|
7+
CSV.open(file, 'r', headers: true).each do |row|
8+
network = IPAddr.new(row['network']).to_range
9+
next if row['geoname_id'].nil?
10+
puts CSV.generate_line([
11+
network.begin,
12+
network.end,
13+
row['geoname_id']
14+
])
15+
end
16+
end

‎bin/util/format_geolite2_ipasn_csv

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
require 'csv'
4+
require 'ipaddr'
5+
6+
ARGV.each do |file|
7+
CSV.open(file, 'r', headers: true).each do |row|
8+
network = IPAddr.new(row['network']).to_range
9+
puts CSV.generate_line([
10+
network.begin,
11+
network.end,
12+
row['autonomous_system_number'],
13+
row['autonomous_system_organization']
14+
])
15+
end
16+
end
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
require 'csv'
4+
5+
ARGV.each do |file|
6+
CSV.open(file, 'r', headers: true).each do |row|
7+
puts CSV.generate_line([
8+
row['geoname_id'],
9+
row['continent_code'],
10+
row['continent_name'],
11+
row['country_iso_code'],
12+
row['country_name'],
13+
row['subdivision_1_iso_code'],
14+
row['subdivision_1_name'],
15+
row['subdivision_2_iso_code'],
16+
row['subdivision_2_name'],
17+
row['city_name'],
18+
])
19+
end
20+
end

‎bin/util/format_import_csv_to_sql

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
require 'securerandom'
4+
5+
# This script takes a CSV file on STDIN and outputs a SQL script that will import the CSV into a new
6+
# table with the same schema as the specified base table, then safely hot-swap the new table in for
7+
# the base table before deleting it. This allows us to import a whole dataset in one go without
8+
# locking the table for reads.
9+
10+
base_table = ARGV[0]
11+
new_table = "#{base_table}_#{SecureRandom.alphanumeric(5)}"
12+
old_table = "#{new_table}_old"
13+
14+
def tee(str)
15+
$stdout.puts str
16+
$stderr.puts str
17+
end
18+
19+
tee "BEGIN;"
20+
21+
tee "CREATE TABLE #{new_table} (LIKE #{base_table} INCLUDING ALL);"
22+
tee "COPY public.#{new_table} FROM stdin WITH (FORMAT csv, FREEZE true);"
23+
24+
$stdin.each_line { |line| puts line }
25+
26+
puts "\\."
27+
28+
tee "ALTER TABLE #{base_table} RENAME TO #{old_table};"
29+
tee "ALTER TABLE #{new_table} RENAME TO #{base_table};"
30+
tee "DROP TABLE #{old_table} CASCADE;"
31+
32+
tee "COMMIT;"

‎bin/util/import_geolite2

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
cd "$(dirname "$0")"
6+
7+
auth="$1"
8+
9+
function download_geolite2() {
10+
set -e
11+
local tmpfile
12+
local tmpdir
13+
tmpfile="$(mktemp -u)"
14+
tmpdir="$(mktemp -d)"
15+
curl -o "$tmpfile" -J -L -u "$auth" --fail-with-body\
16+
"https://download.maxmind.com/geoip/databases/GeoLite2-$1-CSV/download?suffix=zip" >&2
17+
unzip "$tmpfile" -d "$tmpdir" >&2
18+
rm "$tmpfile"
19+
echo "$tmpdir"/GeoLite2-"$1"-CSV_*/
20+
}
21+
22+
citydir=$(download_geolite2 City)
23+
asndir=$(download_geolite2 ASN)
24+
25+
./import_geolite2_cities "$citydir"/GeoLite2-City-Blocks-IPv{4,6}.csv
26+
./import_geolite2_locations "$citydir"/GeoLite2-City-Locations-en.csv
27+
./import_geolite2_ipasn "$asndir"/GeoLite2-ASN-Blocks-IPv{4,6}.csv

‎bin/util/import_geolite2_cities

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
cd "$(dirname "$0")"
6+
7+
./format_geolite2_cities_csv "$@" | ./format_import_csv_to_sql ip_cities | (
8+
cd ../../ && ./bin/rails dbconsole
9+
)

‎bin/util/import_geolite2_ipasn

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
cd "$(dirname "$0")"
6+
7+
./format_geolite2_ipasn_csv "$@" | ./format_import_csv_to_sql ip_asns | (
8+
cd ../../ && ./bin/rails dbconsole
9+
)

‎bin/util/import_geolite2_locations

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
cd "$(dirname "$0")"
6+
7+
./format_geolite2_locations_csv "$@" | ./format_import_csv_to_sql geonames | (
8+
cd ../../ && ./bin/rails dbconsole
9+
)

0 commit comments

Comments
 (0)
Please sign in to comment.