Skip to content

Commit

Permalink
Merge pull request #48 from datasets/action-fail-fix
Browse files Browse the repository at this point in the history
[fix][s] Fixing github action fail - refs #45
  • Loading branch information
sabas authored Feb 6, 2025
2 parents d573490 + dd50fab commit 3c8cad8
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 11 deletions.
15 changes: 5 additions & 10 deletions scripts/download_loc.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import io
import csv
import chardet
import zipfile
import requests
import pandas as pd
import cloudscraper

from bs4 import BeautifulSoup
from collections import defaultdict

scraper = cloudscraper.create_scraper()

source_path = "https://unece.org/trade/cefact/UNLOCODE-Download"

def get_zip_source_path(source_path):
response = requests.get(source_path)
response = scraper.get(source_path)
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for link in soup.find_all('a', href=True):
Expand All @@ -24,7 +20,7 @@ def get_zip_source_path(source_path):


def download_zip(url):
r = requests.get(url, allow_redirects=True)
r = scraper.get(url)
file_name = url.split('/')[-1]
open(file_name, 'wb').write(r.content)

Expand All @@ -33,7 +29,6 @@ def process():
print("Downloading UNLOCODE files")
zip_source_path = get_zip_source_path(source_path)


for elem in zip_source_path:
download_zip(elem)
print(f"Downloaded {elem}")
Expand Down
3 changes: 2 additions & 1 deletion scripts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ titlecase==2.4.1
pandas==2.2.3
requests==2.32.3
chardet==5.2.0
beautifulsoup4==4.12.3
beautifulsoup4==4.12.3
cloudscraper==1.2.71

0 comments on commit 3c8cad8

Please sign in to comment.