-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudobon.py
91 lines (61 loc) · 2.16 KB
/
audobon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import urllib2
import csv,codecs,cStringIO
import re
from bs4 import BeautifulSoup
class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
'''writerow(unicode) -> None
This function takes a Unicode string and encodes it to the output.
'''
self.writer.writerow([s.encode("utf-8") for s in row])
data = self.queue.getvalue()
data = data.decode("utf-8")
data = self.encoder.encode(data)
self.stream.write(data)
self.queue.truncate(0)
url = "https://www.atlantaaudubon.org/field-trips"
page = urllib2.urlopen(url)
raw = page.read()
reg = re.compile('events: (\[.+?)}\);', re.MULTILINE|re.DOTALL)
rawjs = reg.findall(raw)
idreg = re.compile('id:\'(.+?)\', title:\'(.+?)\', start:\'(.+?)\'', re.MULTILINE)
ids = idreg.findall(rawjs[0])
filename = "audobon-events.csv"
soup = BeautifulSoup(raw, 'html.parser')
with open(filename, "wb") as f:
writer = UnicodeWriter(f,quoting=csv.QUOTE_ALL)
writer.writerow(['Organization/organizer', 'Title', 'Description (optional)', 'URL of event', 'Location', 'Category',
'Start Date', 'End Date', 'Start Time', 'End Time','Cost','RSVP info','Age group','Dog-friendly','Indoor or outdoor','Imported to Google Calendar'])
organization = "Atlanta Audubon Society"
for id in ids:
div_id = "event_desc_" + id[0]
title = id[1]
desc = soup.find(id=div_id)
links = desc.find_all('a')
if len(links) > 1:
location = links[1]
else:
location = ""
category = ""
desc = desc.find("div", class_="calendar-event-text")
desc = desc.get_text()
start = id[2]
startDate = start.split()[0]
startTime = start.split()[1]
endDate = ""
endTime = ""
cost = ""
rsvp = ""
ageGroup = ""
dogFriendly = ""
indoorOutdoor = ""
imported = "No"
writer.writerow([organization, title, desc, url, location, category, startDate, endDate,
startTime, endTime, cost, rsvp, ageGroup, dogFriendly, indoorOutdoor, imported])
f.close()
print('Wrote output to ' + filename)