-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
executable file
·340 lines (243 loc) · 11.6 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/usr/bin/env python3
from configparser import ConfigParser
from configparser import SectionProxy # type: ignore
from datetime import datetime
import hashlib
import hmac
from io import BytesIO
from ipaddress import IPv4Address, IPv6Address, IPv4Network, IPv6Network, ip_address, ip_network
import json
import logging
from logging import DEBUG, INFO
import os
from os import environ, path
import re
from urllib.error import URLError
from urllib.request import urlopen
import bottle
from bottle import BaseResponse, Bottle, HTTPError, abort, post, request, response
from cachetools import Cache, LRUCache
from funcy import cache, cut_prefix, keep, memoize, partial as par, re_find
from github import Github
from github.Commit import Commit
from github.GithubException import BadCredentialsException, GithubException, TwoFactorException
from github.PullRequest import PullRequest
from github.Repository import Repository
# type hints per PEP 484
from typing import Any, Generator, Iterable as Iter, List, MutableMapping, NamedTuple, Tuple, Union
AuthorTuple = Tuple[str, str, datetime]
IPAddress = Union[IPv4Address, IPv6Address]
IPNetwork = Union[IPv4Network, IPv6Network]
GH_BASE_URL = 'https://api.github.com'
BASE_DIR = path.dirname(__file__)
LOG = logging.getLogger(__name__)
VERSION = '0.4.0'
CacheItem = NamedTuple('CacheItem', [('value', Any), ('updated_at', datetime)])
ref_head_name = par(re_find, r'refs/heads/(.*)')
@post('/')
def post_index():
event_type = request.get_header('X-GitHub-Event')
if not is_request_from_github():
abort(403, "Forbidden for IP %s, it's not GitHub's address" % remote_ip())
if request.content_type.split(';')[0] != 'application/json':
abort(415, "Expected application/json, but got %s" % request.content_type)
if event_type == 'ping':
return handle_ping()
elif event_type == 'push':
return handle_push()
else:
abort(400, "Unsupported event type: %s" % event_type)
def handle_ping():
return {'msg': "github-pr-closer %s" % VERSION}
def handle_push():
payload = request.json
try:
repo_slug = payload['repository']['full_name']
except KeyError:
abort(422, 'Invalid JSON payload: repository.full_name is missing')
try:
conf = repo_config(repo_slug)
except KeyError:
abort(400, "Unknown repository: %s" % repo_slug)
LOG.info("Handling push from repository: %s", repo_slug)
verify_signature(conf.get('hook_secret', ''),
request.get_header('X-Hub-Signature'),
request.body)
branch = ref_head_name(payload.get('ref', ''))
if not branch or not re.match(r"^%s$" % conf.get('branch_regex', 'master'), branch):
return ok("Skipping push into branch: %s" % (branch or '<unknown>'))
closed_pullreqs = []
try:
repo = Github(conf.get('github_token'), base_url=GH_BASE_URL).get_repo(repo_slug)
pushed_commits = (repo.get_commit(c['id'])
for c in payload.get('commits', []))
for pullreq, merged_commits in find_matching_pulls(repo, pushed_commits):
pullreq_id = "%s#%s" % (repo_slug, pullreq.number)
LOG.debug("Closing pull request %s", pullreq_id)
close_pullreq_with_comment(pullreq, gen_comment(repo_slug, merged_commits))
closed_pullreqs.append(pullreq_id)
except (BadCredentialsException, TwoFactorException) as e:
abort(500, "Authentication error, GitHub returned: %s" % e)
except GithubException as e:
abort(503, str(e))
if closed_pullreqs:
return ok("Closed pull requests: %s" % ', '.join(closed_pullreqs))
else:
return ok('No pull request has been closed')
def default_error_handler(resp: BaseResponse):
response.content_type = 'application/problem+json'
msg = "%s, caused by: %s" % (resp.body, resp.exception) \
if getattr(resp, 'exception', None) else resp.body
LOG.error(msg)
if resp.status_code >= 500 and getattr(resp, 'traceback', None):
LOG.debug(resp.traceback)
return json.dumps({'title': resp.body, 'status': resp.status_code})
def ok(message: str) -> dict:
LOG.info(message)
return {'msg': message}
def is_request_from_github() -> bool:
"""Return True if the current request comes from GitHub."""
return any(remote_ip() in net for net in github_source_networks())
def remote_ip() -> IPAddress:
"""Return request's IP address (i.e. address of the client)."""
addr = request.environ.get('HTTP_X_FORWARDED_FOR') or request.environ.get('REMOTE_ADDR')
# nginx uses ::ffff: as a prefix for IPv4 addresses in ipv6only=off mode.
return ip_address(cut_prefix(addr, '::ffff:'))
@cache(timeout=86400)
def github_source_networks() -> List[IPNetwork]:
"""Return GitHub's IP addresses that may be used for delivering webhook events."""
try:
LOG.debug('Fetching GitHub /meta')
resp = urlopen("%s/meta" % GH_BASE_URL, timeout=5)
data = json.loads(resp.read().decode('utf-8'))
return [ip_network(net) for net in data['hooks']]
except (URLError, ValueError, KeyError) as e:
raise GithubResponseError('Failed to fetch list of allowed IP addresses from GitHub', e)
def verify_signature(secret: str, signature: str, resp_body: BytesIO) -> None:
"""Verify HMAC-SHA1 signature of the given response body.
The signature is expected to be in format ``sha1=<hex-digest>``.
"""
try:
alg, digest = signature.lower().split('=', 1)
except (ValueError, AttributeError):
raise InvalidSignatureError('signature is malformed')
if alg != 'sha1':
raise InvalidSignatureError("expected type sha1, but got %s" % alg)
computed_digest = hmac.new(secret.encode('utf-8'), # type: ignore
msg=resp_body.getbuffer(),
digestmod=hashlib.sha1).hexdigest()
if not hmac.compare_digest(computed_digest, digest):
raise InvalidSignatureError('digests do not match')
def find_matching_pulls(gh_repo: Repository, commits: Iter[Commit]) -> Generator:
"""Find pull requests that contains commits matching the given ``commits``.
It yields tuple :class:`PullRequest` and list of the matched
:class:`Commit`s (subset of the given ``commits``).
The matching algorithm is based on comparing commits by an *author*
(triplet name, email and date) and set of the affected files (just file
names). The match is found when a pull request contains at least one commit
from the given ``commits`` (i.e. their author triplet is the same), and
an union of filenames affected by all the matching commits is the same as of
all the pull request's commits.
"""
LOG.debug('Fetching commits referenced in payload')
commits_by_author = {commit_git_author(c): c for c in commits}
find_matching_commit = commits_by_author.get
cache = shared_cache()
for pullreq in gh_repo.get_pulls(state='open'):
LOG.debug("Checking pull request #%s", pullreq.number)
merged_commits = list(keep(find_matching_commit, pullreq_commits_authors(pullreq, cache)))
merged_files = (f.filename for c in merged_commits for f in c.files)
pullreq_files = (f.filename for f in pullreq.get_files())
if any(merged_commits) and set(merged_files) == set(pullreq_files):
del cache[pullreq.id]
yield pullreq, merged_commits
LOG.debug("Cached items: %d, max size: %d" % (cache.currsize, cache.maxsize))
def commit_git_author(commit: Commit) -> AuthorTuple:
"""Return git *author* from the given ``commit`` as a triple."""
a = commit.commit.author
return (a.name, a.email, a.date)
def pullreq_commits_authors(pullreq: PullRequest,
cache: MutableMapping[int, CacheItem]) -> List[AuthorTuple]:
"""Return a list of git authors of all commits contained in the given pull request."""
cached_item = cache.get(pullreq.id)
if cached_item and cached_item.updated_at >= pullreq.updated_at:
LOG.debug("Using cached data for pull request #%s", pullreq.number)
commits_authors = cached_item.value
else:
LOG.debug("Loading commits for pull request #%s", pullreq.number)
commits_authors = [commit_git_author(c) for c in pullreq.get_commits()]
cache[pullreq.id] = CacheItem(commits_authors, pullreq.updated_at)
return commits_authors
def gen_comment(repo_slug: str, commits: List[Commit]) -> str:
"""Return closing comment for the specified repository.
The comment template is read from config file under the repository's
section and key ``close_comment``. It may contain replacement fields:
committer
Will be replaced by GitHub login (prefixed with ``@``) or name (if the
login is not available) of the committer (based on the first commit from
the given ``commits``).
commits
Will be replaced by a comma-separated list of the ``commits``
SHA hashes.
"""
comment = repo_config(repo_slug)['close_comment']
# Get committer's GitHub login, or just a name if his email is not
# associated with any GitHub account.
try:
committer = "@%s" % commits[0].committer.login
except AttributeError:
committer = commits[0].commit.committer.name
return comment.format(committer=committer,
commits=', '.join(c.sha for c in commits))
def close_pullreq_with_comment(pullreq: PullRequest, comment: str) -> None:
pullreq.create_issue_comment(comment)
pullreq.edit(state='closed')
@memoize
def shared_cache() -> Cache:
maxsize = config()['DEFAULT'].getint('cache_maxsize', 250)
return LRUCache(maxsize=maxsize)
@memoize
def repo_config(repo_slug: str) -> SectionProxy:
"""Return configuration section matching the given's repository slug."""
try:
key = next(key for key in config().sections() if re.match(key, repo_slug))
except StopIteration:
raise ValueError("No configuration section for repository %s found" % repo_slug)
return config()[key]
@memoize
def config() -> ConfigParser:
"""Read settings from a file.
It tries to read ``./settings.ini`` and a file specified by the environment
variable ``CONF_FILE``. If none of them exist, then it raises an error.
"""
conf = ConfigParser()
if not conf.read([path.join(BASE_DIR, 'settings.ini'), os.getenv('CONF_FILE', '')]):
raise FileNotFoundError('No configuration file was found.')
return conf
class InvalidSignatureError(HTTPError):
def __init__(self, message: str, **kwargs) -> None:
msg = "Invalid X-Hub-Signature: %s" % message
super().__init__(status=403, body=msg, **kwargs)
class GithubResponseError(HTTPError):
def __init__(self, message: str, exception: Exception, **kwargs) -> None:
msg = "%s: %s" % (message, exception)
super().__init__(status=503, body=msg, exception=exception, **kwargs)
# Monkey-patch bottle.
Bottle.default_error_handler = lambda _, resp: default_error_handler(resp) # type: ignore
# Set up logging.
logging.basicConfig(format="%(levelname)s: %(message)s")
LOG.setLevel(DEBUG if environ.get('DEBUG') else INFO)
LOG.info("Starting github-pr-closer %s" % VERSION)
# Fail fast when config file is not found.
try:
config()
except FileNotFoundError as e:
LOG.critical("%s" % e)
exit(1)
# Run bottle internal server when invoked directly (mainly for development).
if __name__ == '__main__':
bottle.run(host=environ.get('HTTP_HOST', '127.0.0.1'),
port=environ.get('HTTP_PORT', '8080'))
# Run bottle in application mode (in production under uWSGI server).
else:
application = bottle.default_app()