Skip to content

Commit c88c66b

Browse files
author
jschaff
committed
Merge remote-tracking branch 'origin' into add-sentry-support
2 parents 1eba386 + a903e1b commit c88c66b

16 files changed

+502
-464
lines changed

biothings_annotator/annotator/annotator.py

+34-27
Original file line numberDiff line numberDiff line change
@@ -9,50 +9,52 @@
99

1010
import biothings_client
1111

12-
from .exceptions import InvalidCurieError, TRAPIInputError
13-
from .settings import ANNOTATOR_CLIENTS, SERVICE_PROVIDER_API_HOST
14-
from .transformer import ResponseTransformer
15-
from .utils import batched, get_client, get_dotfield_value, group_by_subfield, parse_curie
12+
from biothings_annotator.annotator.exceptions import InvalidCurieError, TRAPIInputError
13+
from biothings_annotator.annotator.settings import ANNOTATOR_CLIENTS, SERVICE_PROVIDER_API_HOST
14+
from biothings_annotator.annotator.transformer import ResponseTransformer, load_atc_cache
15+
from biothings_annotator.annotator.utils import batched, get_client, get_dotfield_value, group_by_subfield, parse_curie
1616

1717
logger = logging.getLogger(__name__)
1818

1919

2020
class Annotator:
21-
2221
def __init__(self):
2322
self.api_host = os.environ.get("SERVICE_PROVIDER_API_HOST", SERVICE_PROVIDER_API_HOST)
2423

25-
def query_biothings(
24+
async def query_biothings(
2625
self, node_type: str, query_list: List[str], fields: Optional[Union[str, List[str]]] = None
2726
) -> Dict:
2827
"""
2928
Query biothings client based on node_type for a list of ids
3029
"""
3130
client = get_client(node_type, self.api_host)
32-
if not isinstance(client, biothings_client.BiothingClient):
31+
if not isinstance(client, biothings_client.AsyncBiothingClient):
3332
logger.error("Failed to get the biothings client for %s type. This type is skipped.", node_type)
3433
return {}
3534

3635
fields = fields or ANNOTATOR_CLIENTS[node_type]["fields"]
3736
scopes = ANNOTATOR_CLIENTS[node_type]["scopes"]
3837
logger.info("Querying annotations for %s %ss...", len(query_list), node_type)
39-
res = client.querymany(query_list, scopes=scopes, fields=fields)
38+
res = await client.querymany(query_list, scopes=scopes, fields=fields)
4039
logger.info("Done. %s annotation objects returned.", len(res))
4140
grouped_response = group_by_subfield(collection=res, search_key="query")
4241
return grouped_response
4342

44-
def transform(self, res_by_id, node_type):
43+
async def transform(self, res_by_id: Dict, node_type: str):
4544
"""
4645
perform any transformation on the annotation object, but in-place also returned object
4746
res_by_id is the output of query_biothings, node_type is the same passed to query_biothings
4847
"""
4948
logger.info("Transforming output annotations for %s %ss...", len(res_by_id), node_type)
50-
transformer = ResponseTransformer(res_by_id, node_type)
49+
atc_cache = await load_atc_cache(self.api_host)
50+
transformer = ResponseTransformer(res_by_id, node_type, self.api_host, atc_cache)
5151
transformer.transform()
5252
logger.info("Done.")
5353
return res_by_id
5454

55-
def append_extra_annotations(self, node_d: Dict, node_id_subset: Optional[List[str]] = None, batch_n: int = 1000):
55+
async def append_extra_annotations(
56+
self, node_d: Dict, node_id_subset: Optional[List[str]] = None, batch_n: int = 1000
57+
):
5658
"""
5759
Append extra annotations to the existing node_d
5860
"""
@@ -61,7 +63,7 @@ def append_extra_annotations(self, node_d: Dict, node_id_subset: Optional[List[s
6163
cnt = 0
6264
logger.info("Retrieving extra annotations...")
6365
for node_id_batch in batched(node_id_list, batch_n):
64-
extra_res = extra_api.querymany(node_id_batch, scopes="_id", fields="all")
66+
extra_res = await extra_api.querymany(node_id_batch, scopes="_id", fields="all")
6567
for hit in extra_res:
6668
if hit.get("notfound", False):
6769
continue
@@ -83,7 +85,7 @@ def append_extra_annotations(self, node_d: Dict, node_id_subset: Optional[List[s
8385
cnt += 1
8486
logger.info("Done. %s extra annotations appended.", cnt)
8587

86-
def annotate_curie(
88+
async def annotate_curie(
8789
self, curie: str, raw: bool = False, fields: Optional[Union[str, List[str]]] = None, include_extra: bool = True
8890
) -> Dict:
8991
"""
@@ -92,18 +94,21 @@ def annotate_curie(
9294
node_type, _id = parse_curie(curie)
9395
if not node_type:
9496
raise InvalidCurieError(curie)
95-
res = self.query_biothings(node_type, [_id], fields=fields)
97+
res = await self.query_biothings(node_type, [_id], fields=fields)
9698
if not raw:
97-
res = self.transform(res, node_type)
99+
res = await self.transform(res, node_type)
98100
# res = [self.transform(r) for r in res[_id]]
99101
if res and include_extra:
100-
self.append_extra_annotations(res)
101-
return {curie: res.get(_id, {})}
102+
await self.append_extra_annotations(res)
103+
104+
curie_annotation = {curie: res.get(_id, {})}
105+
return curie_annotation
102106

103-
def _annotate_node_list_by_type(
107+
async def _annotate_node_list_by_type(
104108
self, node_list_by_type: Dict, raw: bool = False, fields: Optional[Union[str, List[str]]] = None
105109
) -> Iterable[tuple]:
106-
"""This is a helper method re-used in both annotate_curie_list and annotate_trapi methods
110+
"""
111+
This is a helper method re-used in both annotate_curie_list and annotate_trapi methods
107112
It returns a generator of tuples of (original_node_id, annotation_object) for each node_id,
108113
passed via node_list_by_type.
109114
"""
@@ -117,11 +122,12 @@ def _annotate_node_list_by_type(
117122

118123
# this is the list of query ids like 1017
119124
query_list = [parse_curie(_id, return_type=False, return_id=True) for _id in node_list_by_type[node_type]]
125+
120126
# query_id to original id mapping
121127
node_id_d = dict(zip(query_list, node_list))
122-
res_by_id = self.query_biothings(node_type, query_list, fields=fields)
128+
res_by_id = await self.query_biothings(node_type, query_list, fields=fields)
123129
if not raw:
124-
res_by_id = self.transform(res_by_id, node_type)
130+
res_by_id = await self.transform(res_by_id, node_type)
125131

126132
# map back to original node ids
127133
# NOTE: we don't want to use `for node_id in res_by_id:` here, since we will mofiify res_by_id in the loop
@@ -131,7 +137,7 @@ def _annotate_node_list_by_type(
131137
res_by_id[orig_node_id] = res_by_id.pop(node_id)
132138
yield (orig_node_id, res_by_id[orig_node_id])
133139

134-
def annotate_curie_list(
140+
async def annotate_curie_list(
135141
self,
136142
curie_list: Union[List[str], Iterable[str]],
137143
raw: bool = False,
@@ -154,14 +160,15 @@ def annotate_curie_list(
154160
else:
155161
logger.warning("Unsupported Curie prefix: %s. Skipped!", node_id)
156162

157-
for node_id, res in self._annotate_node_list_by_type(node_list_by_type, raw=raw, fields=fields):
163+
async for node_id, res in self._annotate_node_list_by_type(node_list_by_type, raw=raw, fields=fields):
158164
node_d[node_id] = res
165+
159166
if include_extra:
160167
# currently, we only need to append extra annotations for chem nodes
161168
self.append_extra_annotations(node_d, node_id_subset=node_list_by_type.get("chem", []))
162169
return node_d
163170

164-
def annotate_trapi(
171+
async def annotate_trapi(
165172
self,
166173
trapi_input: Dict,
167174
append: bool = False,
@@ -176,8 +183,8 @@ def annotate_trapi(
176183
try:
177184
node_d = get_dotfield_value("message.knowledge_graph.nodes", trapi_input)
178185
assert isinstance(node_d, dict)
179-
except (KeyError, ValueError, AssertionError):
180-
raise TRAPIInputError(trapi_input)
186+
except (KeyError, ValueError, AssertionError) as access_error:
187+
raise TRAPIInputError(trapi_input) from access_error
181188

182189
# if limit is set, we truncate the node_d to that size
183190
if limit:
@@ -203,7 +210,7 @@ def annotate_trapi(
203210
logger.warning("Unsupported Curie prefix: %s. Skipped!", node_id)
204211

205212
_node_d = {}
206-
for node_id, res in self._annotate_node_list_by_type(node_list_by_type, raw=raw, fields=fields):
213+
async for node_id, res in self._annotate_node_list_by_type(node_list_by_type, raw=raw, fields=fields):
207214
_node_d[node_id] = res
208215

209216
if include_extra:

biothings_annotator/annotator/transformer.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55

66
import inspect
77
import logging
8-
import os
8+
from typing import Dict
99

10-
from .settings import SERVICE_PROVIDER_API_HOST
11-
from .utils import get_client
10+
from biothings_annotator.annotator.settings import SERVICE_PROVIDER_API_HOST
11+
from biothings_annotator.annotator.utils import get_client
1212

1313
logger = logging.getLogger(__name__)
1414

@@ -22,31 +22,33 @@ def append_prefix(id, prefix):
2222
atc_cache = {} # The global atc_cache will be load once when Transformer is initialized for the first time
2323

2424

25-
def load_atc_cache(api_host: str):
26-
"""Load WHO atc code-to-name mapping in a dictionary, which will be used in ResponseTransformer._transform_atc_classifications method"""
25+
async def load_atc_cache(api_host: str) -> Dict:
26+
"""
27+
Load WHO atc code-to-name mapping in a dictionary, which will be used in ResponseTransformer._transform_atc_classifications method
28+
"""
2729
global atc_cache
2830
if not atc_cache:
2931
logger.info("Loading WHO ATC code-to-name mapping...")
3032
atc_client = get_client("extra", api_host)
31-
atc_li = atc_client.query("_exists_:atc.code", fields="atc.code,atc.name", fetch_all=True)
33+
atc_li = await atc_client.query("_exists_:atc.code", fields="atc.code,atc.name", fetch_all=True)
3234
atc_cache = {}
33-
for atc in atc_li:
35+
async for atc in atc_li:
3436
atc_cache[atc["atc"]["code"]] = atc["atc"]["name"]
3537
logger.info(f"Loaded {len(atc_cache)} WHO ATC code-to-name mappings.")
3638
return atc_cache
3739

3840

3941
class ResponseTransformer:
40-
def __init__(self, res_by_id, node_type):
42+
def __init__(self, res_by_id: Dict, node_type: str, api_host: str, atc_cache: Dict):
4143
self.res_by_id = res_by_id
4244
self.node_type = node_type
43-
self.api_host = os.environ.get("SERVICE_PROVIDER_API_HOST", SERVICE_PROVIDER_API_HOST)
45+
self.api_host = api_host
4446

4547
self.data_cache = {} # used to cached required mapping data used for individual transformation
4648
# typically those data coming from other biothings APIs, we will do a batch
4749
# query to get them all, and cache them here for later use, to avoid slow
4850
# one by one queries.
49-
self.atc_cache = load_atc_cache(self.api_host)
51+
self.atc_cache = atc_cache
5052

5153
def _transform_chembl_drug_indications(self, doc):
5254
if self.node_type != "chem":
@@ -71,7 +73,9 @@ def _append_mesh_prefix(chembl):
7173
return doc
7274

7375
def _transform_atc_classifications(self, doc):
74-
"""add atc_classifications field to chem object based on chembl.atc_classifications and pharmgkb.xrefs.atc fields"""
76+
"""
77+
add atc_classifications field to chem object based on chembl.atc_classifications and pharmgkb.xrefs.atc fields
78+
"""
7579
if not self.atc_cache:
7680
return doc
7781

biothings_annotator/annotator/utils.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ def batched(iterable, n):
2121

2222
import biothings_client
2323

24-
from .exceptions import InvalidCurieError
25-
from .settings import ANNOTATOR_CLIENTS, BIOLINK_PREFIX_to_BioThings
24+
from biothings_annotator.annotator.exceptions import InvalidCurieError
25+
from biothings_annotator.annotator.settings import ANNOTATOR_CLIENTS, BIOLINK_PREFIX_to_BioThings
2626

2727
logger = logging.getLogger(__name__)
2828

2929

30-
def get_client(node_type: str, api_host: str) -> Union[biothings_client.BiothingClient, None]:
30+
def get_client(node_type: str, api_host: str) -> Union[biothings_client.AsyncBiothingClient, None]:
3131
"""
3232
Attempts to lazy load the biothings-client instance
3333
@@ -59,21 +59,20 @@ def get_client(node_type: str, api_host: str) -> Union[biothings_client.Biothing
5959
client_configuration = client_parameters.get("configuration")
6060
client_endpoint = client_parameters.get("endpoint")
6161
client_instance = client_parameters.get("instance")
62-
63-
if client_instance is not None and isinstance(client_instance, biothings_client.BiothingClient):
62+
if client_instance is not None and isinstance(client_instance, biothings_client.AsyncBiothingClient):
6463
client = client_instance
6564

6665
elif client_configuration is not None and isinstance(client_configuration, dict):
6766
try:
68-
client = biothings_client.get_client(**client_configuration)
67+
client = biothings_client.get_async_client(**client_configuration)
6968
except RuntimeError as runtime_error:
7069
logger.error("%s [%s]", runtime_error, client_configuration)
7170
client = None
7271

7372
elif client_endpoint is not None and isinstance(client_endpoint, str):
7473
client_url = f"{api_host}/{client_endpoint}"
7574
try:
76-
client = biothings_client.get_client(biothing_type=None, instance=True, url=client_url)
75+
client = biothings_client.get_async_client(biothing_type=None, instance=True, url=client_url)
7776
except RuntimeError as runtime_error:
7877
logger.error("%s [%s]", runtime_error, client_url)
7978
client = None
@@ -84,15 +83,15 @@ def get_client(node_type: str, api_host: str) -> Union[biothings_client.Biothing
8483
)
8584

8685
# cache the client
87-
if isinstance(client, biothings_client.BiothingClient):
86+
if isinstance(client, biothings_client.AsyncBiothingClient):
8887
ANNOTATOR_CLIENTS[node_type]["client"]["instance"] = client
8988

9089
return client
9190

9291

9392
def parse_curie(curie: str, return_type: bool = True, return_id: bool = True):
9493
"""
95-
return a both type and if (as a tuple) or either based on the input curie
94+
return both type and if (as a tuple) or either based on the input curie
9695
"""
9796
if ":" not in curie:
9897
raise InvalidCurieError(curie)
@@ -117,7 +116,7 @@ def group_by_subfield(collection: List[Dict], search_key: str) -> Dict:
117116
Takes a collection of dictionary entries with a specify subfield key "search_key" and
118117
extracts the subfield from each entry in the iterable into a dictionary.
119118
120-
It then bins entries into the dictionary so that identical keys have all results in one
119+
It the bins entries into the dictionary so that identical keys have all results in one
121120
aggregated list across the entire collection of dictionary entries
122121
123122
Example:
@@ -174,4 +173,3 @@ def get_dotfield_value(dotfield: str, d: Dict):
174173
else:
175174
first = fields[0]
176175
return get_dotfield_value(".".join(fields[1:]), d[first])
177-
return get_dotfield_value(".".join(fields[1:]), d[first])

biothings_annotator/application/cli/interface.py

-3
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,6 @@ def run(self):
164164
> Build an HTTP server using the network parameters from the configuration
165165
and the sanic application
166166
"""
167-
if self.inspecting:
168-
self._inspector()
169-
return
170167

171168
self._precheck()
172169
try:

biothings_annotator/application/views/annotator.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ async def head(self, request: Request):
3030

3131
annotator = Annotator()
3232
try:
33-
annotated_node = annotator.annotate_curie(curie, fields=fields, raw=False, include_extra=False)
33+
annotated_node = await annotator.annotate_curie(curie, fields=fields, raw=False, include_extra=False)
3434

3535
if "NCBIGene:1017" not in annotated_node:
3636
return sanic.json(None, status=500)
@@ -71,7 +71,7 @@ async def get(self, _: Request):
7171

7272
annotator = Annotator()
7373
try:
74-
annotated_node = annotator.annotate_curie(curie, fields=fields, raw=False, include_extra=False)
74+
annotated_node = await annotator.annotate_curie(curie, fields=fields, raw=False, include_extra=False)
7575

7676
if "NCBIGene:1017" not in annotated_node:
7777
result = {"success": False, "error": "Service unavailable due to a failed data check!"}
@@ -157,7 +157,7 @@ async def get(self, request: Request, curie: str):
157157
annotator = Annotator()
158158

159159
try:
160-
annotated_node = annotator.annotate_curie(curie, fields=fields, raw=raw, include_extra=include_extra)
160+
annotated_node = await annotator.annotate_curie(curie, fields=fields, raw=raw, include_extra=include_extra)
161161
return sanic.json(annotated_node, headers=self.default_headers)
162162
except InvalidCurieError as curie_err:
163163
error_context = {
@@ -276,7 +276,7 @@ async def post(self, request: Request):
276276
return curie_error_response
277277

278278
try:
279-
annotated_node = annotator.annotate_curie_list(
279+
annotated_node = await annotator.annotate_curie_list(
280280
curie_list=curie_list, fields=fields, raw=raw, include_extra=include_extra
281281
)
282282
return sanic.json(annotated_node, headers=self.default_headers)
@@ -408,7 +408,7 @@ async def post(self, request: Request):
408408
annotator = Annotator()
409409
trapi_body = request.json
410410
try:
411-
annotated_node = annotator.annotate_trapi(
411+
annotated_node = await annotator.annotate_trapi(
412412
trapi_body, fields=fields, raw=raw, append=append, limit=limit, include_extra=include_extra
413413
)
414414
return sanic.json(annotated_node, headers=self.default_headers)

0 commit comments

Comments
 (0)