15
15
import glob
16
16
import warnings
17
17
import json
18
+ import fiona
18
19
warnings .simplefilter (action = 'ignore' , category = UserWarning )
19
20
20
21
@@ -189,7 +190,7 @@ def fetch_data():
189
190
# maximum number of attempts
190
191
n_tries = 5
191
192
# retrieve the current date
192
- date = datetime .datetime .utcnow ( )
193
+ date = datetime .datetime .now ( datetime . timezone . utc )
193
194
fetch_exception = None
194
195
for i in range (0 , n_tries ):
195
196
try :
@@ -360,6 +361,10 @@ def processData(gdb, existing_ids):
360
361
INPUT gdb: fetched geodatabase with new data (geodatabase)
361
362
RETURN all_ids: a list storing all the wdpa_pids in the current dataframe (list of strings)
362
363
'''
364
+ # retrieve the current date
365
+ date = datetime .datetime .now (datetime .timezone .utc )
366
+ date_str = date .strftime ("%b%Y" )
367
+
363
368
# whether we have reached the last slice
364
369
last_slice = False
365
370
# the index of the first row we want to import from the geodatabase
@@ -375,7 +380,7 @@ def processData(gdb, existing_ids):
375
380
# deal with the large geometries first
376
381
for i in range (0 , 100000000 ):
377
382
# import a slice of the geopandas dataframe
378
- gdf = gpd .read_file (gdb , driver = 'FileGDB' , layer = 0 , encoding = 'utf-8' , rows = slice (start , end ))
383
+ gdf = gpd .read_file (gdb , driver = 'FileGDB' , layer = f'WDPA_poly_ { date_str } ' , encoding = 'utf-8' , rows = slice (start , end ), engine = "fiona" )
379
384
if '555643543' in gdf ['WDPA_PID' ].to_list ():
380
385
# isolate the large polygon
381
386
gdf_large = gdf .loc [gdf ['WDPA_PID' ] == '555643543' ]
@@ -394,6 +399,7 @@ def processData(gdb, existing_ids):
394
399
end = start
395
400
start -= step
396
401
402
+ # process WDPA_poly
397
403
# the index of the first row we want to import from the geodatabase
398
404
start = - 100
399
405
# the number of rows we want to fetch and process each time
@@ -402,7 +408,7 @@ def processData(gdb, existing_ids):
402
408
end = None
403
409
for i in range (0 , 100000000 ):
404
410
# import a slice of the geopandas dataframe
405
- gdf = gpd .read_file (gdb , driver = 'FileGDB' , layer = 0 , encoding = 'utf-8' , rows = slice (start , end ))
411
+ gdf = gpd .read_file (gdb , driver = 'FileGDB' , layer = f'WDPA_poly_ { date_str } ' , encoding = 'utf-8' , rows = slice (start , end ), engine = "fiona" )
406
412
# get rid of the \r\n in the wdpa_pid column
407
413
gdf ['WDPA_PID' ] = [x .split ('\r \n ' )[0 ] for x in gdf ['WDPA_PID' ]]
408
414
# create a new column to store the status_yr column as timestamps
@@ -445,7 +451,7 @@ def processData(gdb, existing_ids):
445
451
start = 0
446
452
last_slice = True
447
453
else :
448
- # we've processed the whole dataframe
454
+ # we've processed the whole poly dataframe
449
455
break
450
456
451
457
return (all_ids )
@@ -458,7 +464,7 @@ def updateResourceWatch(num_new):
458
464
# If there are new entries in the Carto table
459
465
if num_new > 0 :
460
466
# Update dataset's last update date on Resource Watch
461
- most_recent_date = datetime .datetime .utcnow ( )
467
+ most_recent_date = datetime .datetime .now ( datetime . timezone . utc )
462
468
lastUpdateDate (DATASET_ID , most_recent_date )
463
469
464
470
# Update the dates on layer legends - TO BE ADDED IN FUTURE
@@ -472,9 +478,9 @@ def check_first_run(existing_ids):
472
478
# get current last updated date
473
479
dataLastUpdated = json .loads (r .content .decode ('utf-8' ))['data' ]['attributes' ]['dataLastUpdated' ]
474
480
# Check if it's more then 10 days ago
475
- if datetime .datetime .utcnow ( ) - datetime .datetime .strptime (dataLastUpdated , "%Y-%m-%dT%H:%M:%S.%fZ" ) > datetime .timedelta (days = 10 ):
481
+ if datetime .datetime .now ( datetime . timezone . utc ) - datetime .datetime .strptime (dataLastUpdated , "%Y-%m-%dT%H:%M:%S.%fZ" ). replace ( tzinfo = datetime . timezone . utc ) > datetime .timedelta (days = 10 ):
476
482
# update last update date
477
- lastUpdateDate (DATASET_ID , datetime .datetime .utcnow ( ))
483
+ lastUpdateDate (DATASET_ID , datetime .datetime .now ( datetime . timezone . utc ))
478
484
# set CLEAR_TABLE_FIRST to True
479
485
CLEAR_TABLE_FIRST = True
480
486
# clear existing_ids
0 commit comments