Skip to content

Commit

Permalink
update to torch 1.11, python 3.10 (#423)
Browse files Browse the repository at this point in the history
Summary:
Now that torch 1.11 is released we should update to it. Also adds python 3.10 support to the unit tests now that torch supports it.

Pull Request resolved: #423

Test Plan:
CI
```
torchx/runtime/container/build.sh
```

Reviewed By: PaliC

Differential Revision: D34903945

Pulled By: d4l3k

fbshipit-source-id: ba6be59f2aabac0accd94a9406cae31a43951f1c
  • Loading branch information
d4l3k authored and facebook-github-bot committed Mar 15, 2022
1 parent 4cd94d8 commit 354d26e
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
unittest:
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.7, 3.8, 3.9, '3.10']
platform: [ubuntu-18.04]
include:
- python-version: 3.9
Expand Down
16 changes: 9 additions & 7 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ kfp==1.8.9
moto==3.0.2
pyre-extensions==0.0.21
pytest
pytorch-lightning==1.5.6
ray[default]==1.9.2
torch-model-archiver==0.4.2
torch==1.10.0
torchserve==0.4.2
torchtext==0.11.0
torchvision==0.11.1
pytorch-lightning==1.5.10
torch-model-archiver>=0.4.2
torch>=1.10.0
torchserve>=0.4.2
torchtext>=0.11.0
torchvision>=0.11.1
ts==0.5.1
usort==0.6.4

# Ray doesn't support Python 3.10
ray[default]==1.11.0; python_version < '3.10'
2 changes: 1 addition & 1 deletion torchx/runtime/container/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime

WORKDIR /app

Expand Down
2 changes: 2 additions & 0 deletions torchx/schedulers/ray/ray_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def create_command_actors(

def main() -> None: # pragma: no cover
actors: List[RayActor] = load_actor_json("actors.json")
# pyre-fixme[16]: Module `worker` has no attribute `init`.
ray.init(address="auto", namespace="torchx-ray")
pgs: List[PlacementGroup] = create_placement_groups(actors)
command_actors: List[CommandActor] = create_command_actors(actors, pgs)
Expand All @@ -148,6 +149,7 @@ def main() -> None: # pragma: no cover

# Await return result of remote ray function
while len(active_workers) > 0:
# pyre-fixme[16]: Module `worker` has no attribute `wait`.
completed_workers, active_workers = ray.wait(active_workers)
# If a failure occurs the ObjectRef will be marked as completed.
# Calling ray.get will expose the failure as a RayActorError.
Expand Down
4 changes: 4 additions & 0 deletions torchx/schedulers/test/ray_scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,14 @@ class RayClusterSetup:
def __new__(cls): # pyre-ignore[3]
if cls._instance is None:
cls._instance = super(RayClusterSetup, cls).__new__(cls)
# pyre-fixme[16]: Module `worker` has no attribute `shutdown`.
ray.shutdown()
start_status: int = os.system("ray start --head")
if start_status != 0:
raise AssertionError(
"ray start --head command has failed. Cannot proceed with running tests"
)
# pyre-fixme[16]: Module `worker` has no attribute `init`.
ray.init(address="auto", ignore_reinit_error=True)
cls.reference_count: int = 2
return cls._instance
Expand All @@ -274,6 +276,7 @@ def decrement_reference(cls) -> None:
cls.teardown_ray_cluster()

def teardown_ray_cluster(cls) -> None:
# pyre-fixme[16]: Module `worker` has no attribute `shutdown`.
ray.shutdown()

class RayDriverTest(TestCase):
Expand Down Expand Up @@ -306,6 +309,7 @@ class RayIntegrationTest(TestCase):
def test_ray_cluster(self) -> None:
ray_cluster_setup = RayClusterSetup()
ray_scheduler = self.setup_ray_cluster()
# pyre-fixme[16]: Module `worker` has no attribute `is_initialized`.
assert ray.is_initialized() is True

job_id = self.schedule_ray_job(ray_scheduler)
Expand Down
2 changes: 1 addition & 1 deletion torchx/specs/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def get_type_name(tp: Type[CfgVal]) -> str:
Note: we use this method to print out generic typing like List[str].
"""
if hasattr(tp, "__name__"):
if tp.__module__ != "typing" and hasattr(tp, "__name__"):
return tp.__name__
else:
return str(tp)
Expand Down

0 comments on commit 354d26e

Please sign in to comment.