-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Oembed config - oembed_endpoints (#2752) #10536
Changes from all commits
6674b26
b1dfb90
323db7a
1505a02
9087812
cf4fb94
dee644b
83203e1
1c852cc
27112d7
63f132e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Added `oembed_providers_dir` stanza to `homeserver.yaml`. It can point to | ||
a directory with oembed providers json files, as in https://oembed.com/providers.json |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,9 @@ | ||
matrix-synapse-py3 (1.42.0~rc1) UNRELEASED; urgency=medium | ||
|
||
* etc/matrix-synapse/oembed directory for providers.json and custom json files | ||
|
||
-- Srdjan <[email protected]> Fri, 20 Aug 2021 12:49:08 +1000 | ||
|
||
matrix-synapse-py3 (1.41.0) stable; urgency=medium | ||
|
||
* New synapse release 1.41.0. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
debian/manage_debconf.pl /opt/venvs/matrix-synapse/lib/ | ||
synapse/res/oembed/providers.json etc/matrix-synapse/oembed |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# Copyright 2021 The Matrix.org Foundation C.I.C. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import json | ||
import logging | ||
import re | ||
from os import listdir, path | ||
from typing import Any, Dict, List | ||
from urllib.parse import urlparse, urlunparse | ||
|
||
from synapse.config._base import Config | ||
from synapse.config._util import validate_config | ||
|
||
logger = logging.Logger(__name__) | ||
|
||
|
||
class OembedConfig(Config): | ||
"""oEmbed endpoints | ||
|
||
:property oembed_endpoints: dict { | ||
domain: [{ | ||
"url": preview_or_discovery_url, | ||
"discovery": bool, | ||
"formats": [], | ||
"patterns": [url_pattern...] | ||
}] | ||
} | ||
""" | ||
|
||
section = "oembed" | ||
oembed_endpoints: Dict[str, List[Dict[str, Any]]] = {} | ||
|
||
def read_config(self, config, **kwargs): | ||
oembed_dir = config.get("oembed_providers_dir") | ||
|
||
if not oembed_dir: | ||
return | ||
|
||
oembed_providers = [] | ||
try: | ||
for fname in listdir(oembed_dir): | ||
if fname.endswith(".json"): | ||
fpath = path.join(oembed_dir, fname) | ||
try: | ||
with open(fpath) as f: | ||
oembed_providers += json.loads(f.read()) | ||
except Exception: | ||
logger.exception(fpath) | ||
except Exception: | ||
logger.exception(oembed_dir) | ||
Comment on lines
+57
to
+60
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please improve the error handling here - we should display errors that are meaningful to the user. If it's a fatal error you can raise |
||
|
||
if not oembed_providers: | ||
return | ||
|
||
try: | ||
validate_config( | ||
_OEMBED_SCHEMA, | ||
oembed_providers, | ||
("oembed_providers",), | ||
) | ||
Comment on lines
+66
to
+70
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we maybe validate the schema for each file separately, so that we can give more meaningful feedback to the user? |
||
except Exception: | ||
pass | ||
# logger.exception('oembed_providers') | ||
# and go on, this is not a show stopper | ||
# return | ||
Comment on lines
+72
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should not simply silently ignore files which don't match the expected schema - this will be very confusing for users. |
||
|
||
for provider in oembed_providers: | ||
provider_url = provider["provider_url"].rstrip("/") | ||
endpoints = [] | ||
for endpoint in provider["endpoints"]: | ||
e = { | ||
"url": endpoint["url"], | ||
"discovery": endpoint.get("discovery", False), | ||
} | ||
|
||
if "formats" in endpoint: | ||
e["formats"] = endpoint["formats"] | ||
|
||
if "schemes" in endpoint: | ||
patterns = [] | ||
for s in endpoint["schemes"]: | ||
results = urlparse(s) | ||
pattern = urlunparse( | ||
[ | ||
results.scheme, | ||
re.escape(results.netloc).replace( | ||
"\\*", "[a-zA-Z0-9_-]+" | ||
), | ||
] | ||
+ [ | ||
re.escape(part).replace("\\*", ".+") | ||
for part in results[2:] | ||
] | ||
) | ||
patterns.append(re.compile(pattern)) | ||
e["patterns"] = patterns | ||
|
||
endpoints.append(e) | ||
|
||
parsed = urlparse(provider_url) | ||
self.oembed_endpoints[re.sub(r"^www\.", "", parsed.netloc)] = endpoints | ||
|
||
def get_oembed_endpoint(self, url): | ||
""" | ||
Check whether the URL has a oEmbed endpoint and return it. | ||
|
||
Args: | ||
url: The URL to check. | ||
|
||
Returns: | ||
oEmbed endpoint URL to use or None. | ||
""" | ||
|
||
parsed = urlparse(url) | ||
for key, endpoints in self.oembed_endpoints.items(): | ||
if parsed.netloc.find(key) == -1: | ||
continue | ||
|
||
for endpoint in endpoints: | ||
if endpoint["discovery"]: | ||
pass # TODO | ||
|
||
if "patterns" not in endpoint: | ||
continue | ||
|
||
for p in endpoint["patterns"]: | ||
if p.fullmatch(url): | ||
return endpoint["url"] | ||
|
||
return | ||
|
||
def generate_config_section(self, config_dir_path, server_name, **kwargs): | ||
return """\ | ||
## Oembed ## | ||
|
||
# Providers json files directory. | ||
# oembed directory contains https://oembed.com/providers.json file | ||
# that you can place in oembed_providers_dir. Deb package does that for you. | ||
# Other custom providers can be added in other .json files, following the | ||
# same json format. | ||
# | ||
oembed_providers_dir: /etc/matrix-synapse/oembed/ | ||
""" | ||
|
||
|
||
_STRING = {"type": "string"} | ||
_ARRAY_OF_STRINGS = {"type": "array", "items": _STRING} | ||
_BOOL = {"type": "boolean"} | ||
_OEMBED_SCHEMA = { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"required": ["provider_name", "provider_url", "endpoints"], | ||
"properties": { | ||
"provider_name": _STRING, | ||
"provider_url": _STRING, | ||
"endpoints": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"required": ["url"], | ||
"properties": { | ||
"url": _STRING, | ||
"schemes": _ARRAY_OF_STRINGS, | ||
"discovery": _BOOL, | ||
"formats": _ARRAY_OF_STRINGS, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should consider people not using the debian package, and instead installing from source.
The default behaviour should be to use the providers file embedded in the package. You can use
pkg_resources.resource_filename
or similar for that (see https://setuptools.readthedocs.io/en/latest/pkg_resources.html#basic-resource-access, and alsosynapse.config._base.Config
for an example).