Implement minimal local and remote mode

liam-sbhoo · liam-sbhoo · commit 9200e09d612f · 2023-08-10T14:23:42.000+02:00
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "tabpfn_client/tabpfn_common_utils"]
+	path = tabpfn_client/tabpfn_common_utils
+	url = https://github.com/liam-sbhoo/tabpfn_common_utils.git
diff --git a/tabpfn_client/server_spec.yaml b/tabpfn_client/server_spec.yaml
@@ -1,27 +1,28 @@
-host: "192.52.42.37"
+#host: "192.52.42.37"
+host: "0.0.0.0"
 port: "80"
 endpoints:
-  - root:
-      path: "/"
-      methods: ["GET"]
-      description: "Root endpoint"
+  root:
+    path: "/"
+    methods: ["GET"]
+    description: "Root endpoint"
 
-  - login:
-      path: "/auth/login/"
-      methods: ["POST"]
-      description: "Login endpoint"
+  login:
+    path: "/auth/login/"
+    methods: ["POST"]
+    description: "Login endpoint"
 
-  - protected_root:
-      path: "/protected/"
-      methods: ["GET"]
-      description: "Protected root endpoint"
+  protected_root:
+    path: "/protected/"
+    methods: ["GET"]
+    description: "Protected root endpoint"
 
-  - upload_train_set:
-      path: "/upload/train_set/"
-      methods: ["POST"]
-      description: "Upload train set endpoint"
+  upload_train_set:
+    path: "/upload/train_set/"
+    methods: ["POST"]
+    description: "Upload train set endpoint"
 
-  - predict:
-      path: "/predict/"
-      methods: ["POST"]
-      description: "Predict endpoint"
+  predict:
+    path: "/predict/"
+    methods: ["POST"]
+    description: "Predict endpoint"
diff --git a/tabpfn_client/tabpfn_classifier.py b/tabpfn_client/tabpfn_classifier.py
@@ -3,7 +3,7 @@
 from omegaconf import OmegaConf
 
 from tabpfn import TabPFNClassifier as TabPFNClassifierLocal
-from tabpfn_client.tabpfn_classifier_interface import TabPFNClassifierInterface
+from tabpfn_client.tabpfn_classifier_interface import AbstractTabPFNClassifier
 from tabpfn_client.tabpfn_service_client import TabPFNServiceClient
 
 SERVER_SPEC_FILE = pathlib.Path(__file__).parent.resolve() / "server_spec.yaml"
@@ -46,9 +46,7 @@ def remove_saved_access_token():
     pass
 
 
-class TabPFNClassifier(TabPFNClassifierInterface):
-    # TODO: ask Sam/Noah if we could create an interface of TabPFNClassifier instead
-
+class TabPFNClassifier(AbstractTabPFNClassifier):
     def __init__(self, device='cpu', base_path=pathlib.Path(__file__).parent.parent.resolve(), model_string='',
                  N_ensemble_configurations=3, no_preprocess_mode=False, multiclass_decoder='permutation',
                  feature_shift_decoder=True, only_inference=True, seed=0, no_grad=True, batch_size_inference=32):
diff --git a/tabpfn_client/tabpfn_classifier_interface.py b/tabpfn_client/tabpfn_classifier_interface.py
@@ -1,18 +1,14 @@
 from abc import ABC, abstractmethod
 
 
-class TabPFNClassifierInterface(ABC):
+class AbstractTabPFNClassifier(ABC):
 
     @abstractmethod
     def remove_models_from_memory(self):
         pass
 
     @abstractmethod
-    def load_result_minimal(self, path, i, e):
-        pass
-
-    @abstractmethod
-    def fit(self, X, y):
+    def fit(self, X, y, overwrite_warning=False):
         pass
 
     @abstractmethod
diff --git a/tabpfn_client/tabpfn_service_client.py b/tabpfn_client/tabpfn_service_client.py
@@ -1,12 +1,15 @@
 import os
 import httpx
+from typing import Any
+import logging
 
-from tabpfn_client.tabpfn_classifier_interface import TabPFNClassifierInterface
+from tabpfn_client.tabpfn_classifier_interface import AbstractTabPFNClassifier
+from tabpfn_client.tabpfn_common_utils import utils as common_utils
 
 SERVER_ENDPOINTS_YAML = os.path.join(os.path.dirname(__file__), "server_endpoints.yaml")
 
 
-class TabPFNServiceClient(TabPFNClassifierInterface):
+class TabPFNServiceClient(AbstractTabPFNClassifier):
     def __init__(self, server_spec: dict, access_token: str):
         self.host = server_spec["host"]
         self.port = server_spec["port"]
@@ -16,22 +19,66 @@ def __init__(self, server_spec: dict, access_token: str):
         self.access_token = access_token
         self.server_endpoints = server_spec["endpoints"]
 
+        self.last_per_user_train_set_id = None
+
     def remove_models_from_memory(self):
         raise NotImplementedError
 
-    def load_result_minimal(self, path, i, e):
-        raise NotImplementedError
+    def fit(self, X: Any, y: Any):
+        X = common_utils.serialize_to_csv_formatted_bytes(X)
+        y = common_utils.serialize_to_csv_formatted_bytes(y)
 
-    def fit(self, X, y):
-        pass
+        response = self.client.post(
+            url=self.server_endpoints["upload_train_set"]["path"],
+            headers={"Authorization": f"Bearer {self.access_token}"},
+            files=common_utils.to_httpx_post_file_format([
+                ("x_file", X),
+                ("y_file", y)
+            ])
+        )
 
-    def predict(self, X):
-        raise NotImplementedError
+        if response.status_code != 200:
+            logging.error(f"Fail to call upload_train_set(), response status: {response.status_code}")
+            # TODO: error probably doesn't have json() method, check in unit test
+            logging.error(f"Fail to call fit(), server response: {response.json()}")
+            raise RuntimeError(f"Fail to call fit(), server response: {response.json()}")
+
+        self.last_per_user_train_set_id = response.json()["per_user_train_set_id"]
+
+        return self
+
+    def predict(self, X, return_winning_class=False, normalize_with_test=False):
+
+        # TODO: handle return_winning_class and normalize_with_test
+
+        # check if user has already called fit() before
+        if self.last_per_user_train_set_id is None:
+            raise RuntimeError("You must call fit() before calling predict()")
+
+        X = common_utils.serialize_to_csv_formatted_bytes(X)
+
+        response = self.client.post(
+            url=self.server_endpoints["predict"]["path"],
+            headers={"Authorization": f"Bearer {self.access_token}"},
+            params={"per_user_train_set_id": self.last_per_user_train_set_id},
+            files=common_utils.to_httpx_post_file_format([
+                ("x_file", X)
+            ])
+        )
+
+        if response.status_code != 200:
+            logging.error(f"Fail to call predict(), response status: {response.status_code}")
+            raise RuntimeError(f"Fail to call predict(), server response: {response.json()}")
+
+        return response.json()
 
     def predict_proba(self, X, return_winning_probability=False, normalize_with_test=False):
         pass
 
     def try_root(self):
-        response = self.client.get("/")
+        response = self.client.get(
+            self.server_endpoints["protected_root"]["path"],
+            headers={"Authorization": f"Bearer {self.access_token}"},
+        )
         print("response:", response.json())
         return response
diff --git a/tabpfn_client/tests/integration/test_tabpfn_classifier.py b/tabpfn_client/tests/integration/test_tabpfn_classifier.py
@@ -0,0 +1,17 @@
+import unittest
+
+
+class TestTabpfnClassifier(unittest.TestCase):
+    def test_use_local_tabpfn_classifier(self):
+        pass
+
+    def test_use_remote_tabpfn_classifier(self):
+        pass
+
+
+class TestInitTabPFNBuilder(unittest.TestCase):
+    def test_save_access_token_upon_successful_login(self):
+        pass
+
+    def test_remove_saved_access_token(self):
+        pass
diff --git a/tabpfn_client/tests/integration/test_tabpfn_service_client.py b/tabpfn_client/tests/integration/test_tabpfn_service_client.py
@@ -0,0 +1,24 @@
+import unittest
+
+
+class TestTabpfnServiceClient(unittest.TestCase):
+    def test_invalid_auth_token(self):
+        pass
+
+    def test_predict_with_valid_train_set_and_test_set(self):
+        pass
+
+    def test_predict_with_conflicting_test_set(self):
+        pass
+
+    def test_call_predict_without_calling_fit_before(self):
+        pass
+
+    def test_call_predict_proba_without_calling_fit_before(self):
+        pass
+
+    def test_call_predict_after_calling_fit_twice(self):
+        pass
+
+    def test_call_predict_proba_after_calling_fit_twice(self):
+        pass

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "tabpfn_client/tabpfn_common_utils"]`
	`2`	`+ path = tabpfn_client/tabpfn_common_utils`
	`3`	`+ url = https://github.com/liam-sbhoo/tabpfn_common_utils.git`