Skip to content

Commit 70370d7

Browse files
authored
Merge pull request #1215 from resource-watch/cit_002
update to match new file structure from source
2 parents e647587 + 601fafe commit 70370d7

File tree

4 files changed

+18
-11
lines changed

4 files changed

+18
-11
lines changed

bio_007_world_database_on_protected_areas/Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.9
1+
FROM python:3.10
22
LABEL maintainer="Weiqi Zhou <[email protected]>"
33
#Note this script was originally developed by Yujing Wu <[email protected]>
44

@@ -14,6 +14,7 @@ RUN pip install numpy
1414
RUN pip install pandas
1515
RUN pip install python-rapidjson
1616
RUN pip install geopandas==1.0.1
17+
RUN pip install fiona
1718

1819
# set name
1920
ARG NAME=nrt-script

bio_007_world_database_on_protected_areas/contents/src/__init__.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import glob
1616
import warnings
1717
import json
18+
import fiona
1819
warnings.simplefilter(action='ignore', category=UserWarning)
1920

2021

@@ -189,7 +190,7 @@ def fetch_data():
189190
# maximum number of attempts
190191
n_tries = 5
191192
# retrieve the current date
192-
date = datetime.datetime.utcnow()
193+
date = datetime.datetime.now(datetime.timezone.utc)
193194
fetch_exception = None
194195
for i in range(0, n_tries):
195196
try:
@@ -360,6 +361,10 @@ def processData(gdb, existing_ids):
360361
INPUT gdb: fetched geodatabase with new data (geodatabase)
361362
RETURN all_ids: a list storing all the wdpa_pids in the current dataframe (list of strings)
362363
'''
364+
# retrieve the current date
365+
date = datetime.datetime.now(datetime.timezone.utc)
366+
date_str = date.strftime("%b%Y")
367+
363368
# whether we have reached the last slice
364369
last_slice = False
365370
# the index of the first row we want to import from the geodatabase
@@ -375,7 +380,7 @@ def processData(gdb, existing_ids):
375380
# deal with the large geometries first
376381
for i in range(0, 100000000):
377382
# import a slice of the geopandas dataframe
378-
gdf = gpd.read_file(gdb, driver='FileGDB', layer = 0, encoding='utf-8', rows = slice(start, end))
383+
gdf = gpd.read_file(gdb, driver='FileGDB', layer =f'WDPA_poly_{date_str}', encoding='utf-8', rows = slice(start, end), engine="fiona")
379384
if '555643543' in gdf['WDPA_PID'].to_list():
380385
# isolate the large polygon
381386
gdf_large = gdf.loc[gdf['WDPA_PID'] =='555643543']
@@ -394,6 +399,7 @@ def processData(gdb, existing_ids):
394399
end = start
395400
start -= step
396401

402+
# process WDPA_poly
397403
# the index of the first row we want to import from the geodatabase
398404
start = -100
399405
# the number of rows we want to fetch and process each time
@@ -402,7 +408,7 @@ def processData(gdb, existing_ids):
402408
end = None
403409
for i in range(0, 100000000):
404410
# import a slice of the geopandas dataframe
405-
gdf = gpd.read_file(gdb, driver='FileGDB', layer = 0, encoding='utf-8', rows = slice(start, end))
411+
gdf = gpd.read_file(gdb, driver='FileGDB', layer =f'WDPA_poly_{date_str}', encoding='utf-8', rows = slice(start, end), engine="fiona")
406412
# get rid of the \r\n in the wdpa_pid column
407413
gdf['WDPA_PID'] = [x.split('\r\n')[0] for x in gdf['WDPA_PID']]
408414
# create a new column to store the status_yr column as timestamps
@@ -445,7 +451,7 @@ def processData(gdb, existing_ids):
445451
start = 0
446452
last_slice = True
447453
else:
448-
# we've processed the whole dataframe
454+
# we've processed the whole poly dataframe
449455
break
450456

451457
return(all_ids)
@@ -458,7 +464,7 @@ def updateResourceWatch(num_new):
458464
# If there are new entries in the Carto table
459465
if num_new>0:
460466
# Update dataset's last update date on Resource Watch
461-
most_recent_date = datetime.datetime.utcnow()
467+
most_recent_date = datetime.datetime.now(datetime.timezone.utc)
462468
lastUpdateDate(DATASET_ID, most_recent_date)
463469

464470
# Update the dates on layer legends - TO BE ADDED IN FUTURE
@@ -472,9 +478,9 @@ def check_first_run(existing_ids):
472478
# get current last updated date
473479
dataLastUpdated = json.loads(r.content.decode('utf-8'))['data']['attributes']['dataLastUpdated']
474480
# Check if it's more then 10 days ago
475-
if datetime.datetime.utcnow() - datetime.datetime.strptime(dataLastUpdated, "%Y-%m-%dT%H:%M:%S.%fZ") > datetime.timedelta(days=10):
481+
if datetime.datetime.now(datetime.timezone.utc) - datetime.datetime.strptime(dataLastUpdated, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=datetime.timezone.utc) > datetime.timedelta(days=10):
476482
# update last update date
477-
lastUpdateDate(DATASET_ID, datetime.datetime.utcnow())
483+
lastUpdateDate(DATASET_ID, datetime.datetime.now(datetime.timezone.utc))
478484
# set CLEAR_TABLE_FIRST to True
479485
CLEAR_TABLE_FIRST = True
480486
# clear existing_ids

bio_007b_nrt_rw0_marine_protected_areas/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.9
1+
FROM python:3.10
22
LABEL maintainer="Weiqi Zhou <[email protected]>"
33
# Note this script was originally developed by Yujing Wu <[email protected]>
44

bio_007b_nrt_rw0_marine_protected_areas/contents/src/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def fetch():
212212
'''
213213
# pull the data from the url
214214
n_tries = 5
215-
date = datetime.datetime.utcnow()
215+
date = datetime.datetime.now(datetime.timezone.utc)
216216
fetch_exception = None
217217
for i in range(0, n_tries):
218218
try:
@@ -355,7 +355,7 @@ def updateResourceWatch(num_new):
355355
# If there have been data uploaded to the Carto table
356356
if num_new > 0:
357357
# Update dataset's last update date on Resource Watch
358-
most_recent_date = datetime.datetime.utcnow()
358+
most_recent_date = datetime.datetime.now(datetime.timezone.utc)
359359
lastUpdateDate(DATASET_ID, most_recent_date)
360360

361361
def main():

0 commit comments

Comments
 (0)