Skip to content

Commit 439104d

Browse files
committed
Merge tag 'itrb-deployment-20241017' into production
2 parents 1c75259 + 2de2fe4 commit 439104d

File tree

80 files changed

+11055
-10083
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+11055
-10083
lines changed

.github/workflows/pytest.yml

+14-15
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,24 @@ jobs:
3535
steps:
3636
- name: Checkout repository
3737
uses: actions/checkout@v2
38+
3839

3940
- name: Set up config files
4041
run: |
4142
scp [email protected]:config_secrets.json /mnt/config/config_secrets.json
42-
echo "http://localhost:5008/api/rtxkg2/v1.4" > /mnt/config/kg2_url_override.txt
4343
44-
# - name: Append code dir to GITHUB_PATH # Causing RTXConfiguration import error
45-
# run: echo $GITHUB_PATH
4644
47-
# - name: Download databases as needed
48-
# run: python3 $HOME/RTX/code/ARAX/ARAXQuery/ARAX_database_manager.py --mnt --skip-if-exists --remove_unused
45+
- name: Download databases into CICD instance
46+
continue-on-error: true
47+
run: |
48+
cd /home/ubuntu/RTX
49+
git checkout ${GITHUB_REF#refs/heads/}
50+
git pull
51+
/home/ubuntu/venv3.9/bin/pip3 install -r requirements.txt
52+
/home/ubuntu/venv3.9/bin/python3 code/ARAX/ARAXQuery/ARAX_database_manager.py --mnt --skip-if-exists --remove_unused
53+
git checkout master
54+
55+
4956
5057
- name: Stop any running containers
5158
continue-on-error: true
@@ -63,25 +70,17 @@ jobs:
6370
run: sudo docker build --no-cache=true --rm -t rtx:test DockerBuild/ -f DockerBuild/Merged-Dockerfile
6471

6572
- name: Run docker container
66-
run: sudo docker run -d -it --name rtx_test -v /mnt/data/orangeboard/databases:/mnt/data/orangeboard/databases -v /mnt/config/kg2_url_override.txt:/mnt/data/orangeboard/production/RTX/code/kg2_url_override.txt -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/production/RTX/code/config_secrets.json -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/kg2/RTX/code/config_secrets.json rtx:test
67-
68-
# - name: Add config.domain
69-
# run: |
70-
# sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX/code && echo \'RTXTeam/RTX Github actions test suite\' > config.domain'"
71-
# sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/kg2/RTX/code && echo \'RTXTeam/RTX Github actions test suite\' > config.domain'"
73+
run: sudo docker run -d -it --name rtx_test -v /mnt/data/orangeboard/databases:/mnt/data/orangeboard/databases -v /mnt/config/config_secrets.json:/mnt/data/orangeboard/production/RTX/code/config_secrets.json rtx:test
7274

7375
- name: Create symlinks
74-
run: |
75-
sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"
76-
sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/kg2/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"
76+
run: sudo docker exec rtx_test bash -c "sudo -u rt bash -c 'cd /mnt/data/orangeboard/production/RTX && python3 code/ARAX/ARAXQuery/ARAX_database_manager.py'"
7777

7878
- name: Build KP info cache
7979
run: sudo docker exec rtx_test bash -c "cd /mnt/data/orangeboard/production/RTX/code/ARAX/ARAXQuery/Expand && python3 kp_info_cacher.py"
8080

8181
- name: Start services
8282
run: |
8383
sudo docker exec rtx_test service apache2 start
84-
sudo docker exec rtx_test service RTX_OpenAPI_kg2 start
8584
sudo docker exec rtx_test service RTX_OpenAPI_production start
8685
sudo docker exec rtx_test service RTX_Complete start
8786

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@ code/ARAX/ARAXQuery/Infer/data/xCRG_data/*.pt
5151
code/ARAX/ARAXQuery/Infer/data/xCRG_data/*.npz
5252

5353
code/UI/OpenAPI/python-flask-server/openapi_server/openapi/openapi.json
54+
code/UI/OpenAPI/specifications/export/KG2/*/openapi.json

ISSUE_TEMPLATES/kg2rollout.md

+45-66
Large diffs are not rendered by default.

code/ARAX/ARAXQuery/ARAX_connect.py

+74-26
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
1212
import copy
1313

1414
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
15-
from Path_Finder.converter.paths_to_response_converter_factory import paths_to_response_converter_factory
15+
from Path_Finder.converter.EdgeExtractorFromPloverDB import EdgeExtractorFromPloverDB
16+
from Path_Finder.converter.SuperNodeConverter import SuperNodeConverter
1617
from Path_Finder.converter.Names import Names
1718
from Path_Finder.BidirectionalPathFinder import BidirectionalPathFinder
18-
from Path_Finder.repo.NGDSortedNeighborsRepo import NGDSortedNeighborsRepo
19-
from Path_Finder.repo.PloverDBRepo import PloverDBRepo
2019

2120
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../UI/OpenAPI/python-flask-server/")
2221
from openapi_server.models.q_edge import QEdge
22+
from openapi_server.models.q_node import QNode
2323
from openapi_server.models.knowledge_graph import KnowledgeGraph
24+
2425
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../NodeSynonymizer/")
2526
from node_synonymizer import NodeSynonymizer
2627

@@ -51,12 +52,11 @@ def __init__(self):
5152
"type": "list",
5253
"description": "List with just two qnode keys to connect. example: [n1, n2]"
5354
}
54-
self.result_as_info = {
55+
self.node_category_constraint_info = {
5556
"is_required": False,
56-
"examples": ['betweenness_centrality', 'all_in_one', 'one_by_one'],
57+
"examples": ['biolink:Disease', 'biolink:Gene', 'biolink:ChemicalEntity'],
5758
"type": "string",
58-
"description": "It determines how to receive the results. For instance, one_by_one means that it will "
59-
"return each path in one subgraph. The default value is betweenness_centrality"
59+
"description": "This constraint will display paths that only pass through the user-specified category."
6060
}
6161

6262
# command descriptions
@@ -78,7 +78,7 @@ def __init__(self):
7878
"parameters": {
7979
"max_path_length": self.max_path_length_info,
8080
"qnode_keys": self.qnode_keys_info,
81-
"result_as": self.result_as_info
81+
"node_category_constraint": self.node_category_constraint_info
8282
}
8383
}
8484
}
@@ -148,6 +148,8 @@ def check_params(self, allowable_parameters):
148148
continue
149149
elif any([type(x) == int for x in allowable_parameters[key]]):
150150
continue
151+
elif any([type(x) == str for x in allowable_parameters[key]]):
152+
continue
151153
else: # otherwise, it's really not an allowable parameter
152154
self.response.warning(
153155
f"Supplied value {item} is not permitted. In action {allowable_parameters['action']}, allowable values to {key} are: {list(allowable_parameters[key])}")
@@ -217,15 +219,15 @@ def __connect_nodes(self, describe=False):
217219
if message and parameters and hasattr(message, 'query_graph') and hasattr(message.query_graph, 'nodes'):
218220
allowable_parameters = {'action': {'connect_nodes'},
219221
'max_path_length': {int()},
220-
'result_as': {'betweenness_centrality', 'all_in_one', 'one_by_one'},
222+
'node_category_constraint': {str()},
221223
'qnode_keys': set(self.message.query_graph.nodes.keys())
222224
}
223225
else:
224226
allowable_parameters = {'action': {'connect_nodes'},
225227
'max_path_length': {
226-
'a maximum path length to use to connect qnodes. Defaults to 2.'},
227-
'result_as': {
228-
'How to show results?'},
228+
'A maximum path length to use to connect qnodes. Defaults to 2.'},
229+
'node_category_constraint': {
230+
'All paths must include at least one node from this category constraint.'},
229231
'qnode_keys': {'A list with just two query keys to connect'}
230232
}
231233

@@ -253,8 +255,8 @@ def __connect_nodes(self, describe=False):
253255

254256
if 'max_path_length' not in self.parameters:
255257
self.parameters['max_path_length'] = 2
256-
if 'result_as' not in self.parameters:
257-
self.parameters['result_as'] = 'betweenness_centrality'
258+
if 'node_category_constraint' not in self.parameters:
259+
self.parameters['node_category_constraint'] = ''
258260
# convert path length to int if it isn't already
259261
if type(self.parameters['max_path_length']) != int:
260262
self.parameters['max_path_length'] = int(self.parameters['max_path_length'])
@@ -281,43 +283,89 @@ def __connect_nodes(self, describe=False):
281283
self.response.error(f"Need to have two nodes to find paths between them. Number of nodes: {len(nodes)}")
282284

283285
path_finder = BidirectionalPathFinder(
284-
NGDSortedNeighborsRepo(
285-
PloverDBRepo(plover_url=RTXConfiguration().plover_url)
286-
)
286+
"NGDSortedNeighborsRepo",
287+
self.response
287288
)
288289
qnode_1_id = self.parameters['qnode_keys'][0]
289290
qnode_2_id = self.parameters['qnode_keys'][1]
290291
synonymizer = NodeSynonymizer()
291-
node_1_id = synonymizer.get_canonical_curies(curies=nodes[qnode_1_id].ids[0])[nodes[qnode_1_id].ids[0]]['preferred_curie']
292-
node_2_id = synonymizer.get_canonical_curies(curies=nodes[qnode_2_id].ids[0])[nodes[qnode_2_id].ids[0]]['preferred_curie']
292+
try:
293+
node_1_id = synonymizer.get_canonical_curies(curies=nodes[qnode_1_id].ids[0])[nodes[qnode_1_id].ids[0]][
294+
'preferred_curie']
295+
except Exception as e:
296+
self.response.error(f"PathFinder could not get canonical CURIE for the node: {qnode_1_id}."
297+
f" You need to provide id (CURIE) or name for this node"
298+
f" Error message is: {e}")
299+
return self.response
300+
301+
try:
302+
node_2_id = synonymizer.get_canonical_curies(curies=nodes[qnode_2_id].ids[0])[nodes[qnode_2_id].ids[0]][
303+
'preferred_curie']
304+
except Exception as e:
305+
self.response.error(f"PathFinder could not get canonical CURIE for the node: {qnode_2_id}"
306+
f" You need to provide id (CURIE) or name for this node."
307+
f" Error message is: {e}")
308+
return self.response
293309

294310
paths = path_finder.find_all_paths(node_1_id, node_2_id, hops_numbers=self.parameters['max_path_length'])
295311

312+
self.response.debug(f"PathFinder found {len(paths)} paths")
313+
296314
if len(paths) == 0:
297315
self.response.warning(f"Could not connect the nodes {qnode_1_id} and {qnode_2_id} "
298316
f"with a max path length of {self.parameters['max_path_length']}.")
299317
return self.response
300318

301-
q_edge_name = 'q_edge_path_finder'
302-
self.response.envelope.message.query_graph.edges[q_edge_name] = QEdge(
319+
qnode_mid_id = "qnode_mid_id"
320+
self.response.envelope.message.query_graph.nodes[qnode_mid_id] = QNode(
321+
ids=[],
322+
categories=None,
323+
is_set=False,
324+
set_interpretation='BATCH',
325+
set_id=None,
326+
constraints=[],
327+
option_group_id=None
328+
)
329+
330+
q_edge_src_dest = 'q_edge_src_dest'
331+
self.response.envelope.message.query_graph.edges[q_edge_src_dest] = QEdge(
303332
object=qnode_1_id,
304333
subject=qnode_2_id
305334
)
335+
q_edge_src_mid = 'q_edge_src_mid'
336+
self.response.envelope.message.query_graph.edges[q_edge_src_mid] = QEdge(
337+
object=qnode_1_id,
338+
subject=qnode_mid_id
339+
)
340+
q_edge_mid_dest = 'q_edge_mid_dest'
341+
self.response.envelope.message.query_graph.edges[q_edge_mid_dest] = QEdge(
342+
object=qnode_mid_id,
343+
subject=qnode_2_id
344+
)
306345

307346
names = Names(
308-
q_edge_name=q_edge_name,
347+
q_src_dest_edge_name=q_edge_src_dest,
348+
q_src_mid_edge_name=q_edge_src_mid,
349+
q_mid_dest_edge_name=q_edge_mid_dest,
309350
result_name="result",
310351
auxiliary_graph_name="aux",
311-
kg_edge_name="kg_edge"
352+
kg_src_dest_edge_name="kg_src_dest_edge",
353+
kg_src_mid_edge_name="kg_src_mid_edge",
354+
kg_mid_dest_edge_name="kg_mid_dest_edge",
355+
)
356+
edge_extractor = EdgeExtractorFromPloverDB(
357+
RTXConfiguration().plover_url
312358
)
313-
paths_to_response_converter_factory(
314-
self.parameters['result_as'],
359+
SuperNodeConverter(
315360
paths,
316361
node_1_id,
317362
node_2_id,
318363
qnode_1_id,
319364
qnode_2_id,
320-
names
365+
qnode_mid_id,
366+
names,
367+
edge_extractor,
368+
self.parameters['node_category_constraint']
321369
).convert(self.response)
322370

323371
if mode != "RTXKG2" and not hasattr(self.response, "original_query_graph"):

code/ARAX/ARAXQuery/ARAX_database_manager.py

+8
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def __init__(self, allow_downloads=False):
7979
self.local_paths = {
8080
'cohd_database': f"{cohd_filepath}{os.path.sep}{self.RTXConfig.cohd_database_path.split('/')[-1]}",
8181
'curie_to_pmids': f"{ngd_filepath}{os.path.sep}{self.RTXConfig.curie_to_pmids_path.split('/')[-1]}",
82+
'curie_ngd': f"{ngd_filepath}{os.path.sep}{self.RTXConfig.curie_ngd_path.split('/')[-1]}",
8283
'node_synonymizer': f"{synonymizer_filepath}{os.path.sep}{self.RTXConfig.node_synonymizer_path.split('/')[-1]}",
8384
'kg2c_sqlite': f"{kg2c_filepath}{os.path.sep}{self.RTXConfig.kg2c_sqlite_path.split('/')[-1]}",
8485
'kg2c_meta_kg': f"{kg2c_meta_kg_filepath}{os.path.sep}{self.RTXConfig.kg2c_meta_kg_path.split('/')[-1]}",
@@ -95,6 +96,7 @@ def __init__(self, allow_downloads=False):
9596
self.database_subpaths = {
9697
'cohd_database': self.get_database_subpath(self.RTXConfig.cohd_database_path),
9798
'curie_to_pmids': self.get_database_subpath(self.RTXConfig.curie_to_pmids_path),
99+
'curie_ngd': self.get_database_subpath(self.RTXConfig.curie_ngd_path),
98100
'node_synonymizer': self.get_database_subpath(self.RTXConfig.node_synonymizer_path),
99101
'kg2c_sqlite': self.get_database_subpath(self.RTXConfig.kg2c_sqlite_path),
100102
'kg2c_meta_kg': self.get_database_subpath(self.RTXConfig.kg2c_meta_kg_path),
@@ -110,6 +112,7 @@ def __init__(self, allow_downloads=False):
110112
self.remote_locations = {
111113
'cohd_database': self.get_remote_location('cohd_database'),
112114
'curie_to_pmids': self.get_remote_location('curie_to_pmids'),
115+
'curie_ngd': self.get_remote_location('curie_ngd'),
113116
'node_synonymizer': self.get_remote_location('node_synonymizer'),
114117
'kg2c_sqlite': self.get_remote_location('kg2c_sqlite'),
115118
'kg2c_meta_kg': self.get_remote_location('kg2c_meta_kg'),
@@ -125,6 +128,7 @@ def __init__(self, allow_downloads=False):
125128
self.docker_central_paths = {
126129
'cohd_database': self.get_docker_path('cohd_database'),
127130
'curie_to_pmids': self.get_docker_path('curie_to_pmids'),
131+
'curie_ngd': self.get_docker_path('curie_ngd'),
128132
'node_synonymizer': self.get_docker_path('node_synonymizer'),
129133
'kg2c_sqlite': self.get_docker_path('kg2c_sqlite'),
130134
'kg2c_meta_kg': self.get_docker_path('kg2c_meta_kg'),
@@ -146,6 +150,10 @@ def __init__(self, allow_downloads=False):
146150
'path': self.local_paths['curie_to_pmids'],
147151
'version': self.RTXConfig.curie_to_pmids_version
148152
},
153+
'curie_ngd': {
154+
'path': self.local_paths['curie_ngd'],
155+
'version': self.RTXConfig.curie_ngd_version
156+
},
149157
'node_synonymizer': {
150158
'path': self.local_paths['node_synonymizer'],
151159
'version': self.RTXConfig.node_synonymizer_version

0 commit comments

Comments
 (0)