You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py in get_oof_per_estimator(self, X, y, return_loss_per_estimator, impute_dropped_instances, _extra_processing)
370 holdout_index_hit_counts = current_repeat
371
--> 372 self._fill_predictions_in_place(
373 model_i=model_i,
374 base_model=base_model,
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py in _fill_predictions_in_place(self, model_i, base_model, oof_proba_list, X, y, train_index, test_index, loss_per_estimator, holdout_index_hits, _extra_processing, split_i)
125
126 # Default base models case
--> 127 base_model.fit(fold_X_train, fold_y_train)
128
129 pred = self._predict_oof(base_model, fold_X_test)
/kaggle/working/tabpfn_extensions/rf_pfn/SklearnBasedRandomForestTabPFN.py in fit(self, X, y, sample_weight)
74 if torch.is_tensor(y):
75 y = y.numpy()
---> 76 super().fit(X, y)
77 except TypeError as e:
78 print("Error in fit with data", X, y)
/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight)
343 if issparse(y):
344 raise ValueError("sparse multilabel-indicator for y is not supported.")
--> 345 X, y = self._validate_data(
346 X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE
347 )
/usr/local/lib/python3.10/dist-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
582 y = check_array(y, input_name="y", **check_y_params)
583 else:
--> 584 X, y = check_X_y(X, y, **check_params)
585 out = X, y
586
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1104 )
1105
-> 1106 X = check_array(
1107 X,
1108 accept_sparse=accept_sparse,
ValueError: Input X contains NaN.
RandomForestTabPFNRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values
The text was updated successfully, but these errors were encountered:
ValueError Traceback (most recent call last)
in <cell line: 6>()
4 #clf = TabPFNRegressor()
5 clf = AutoTabPFNRegressor(max_time=30, device="cuda")
----> 6 clf.fit(X_t, y)
7 test_preds = clf.predict(X_test)
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/sklearn_interface.py in fit(self, X, y, categorical_feature_indices)
220 )
221
--> 222 self.predictor_.fit(
223 X,
224 y,
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/pfn_phe.py in fit(self, X, y, categorical_feature_indices)
331 )
332
--> 333 self._ens_model.fit(X, y)
334
335 return self
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/greedy_weighted_ensemble.py in fit(self, X, y)
232
233 def fit(self, X, y):
--> 234 weights = self.get_weights(X, y)
235
236 final_weights = []
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/greedy_weighted_ensemble.py in get_weights(self, X, y)
171
172 def get_weights(self, X, y):
--> 173 oof_proba = self.get_oof_per_estimator(X, y)
174 self.model_family_per_estimator = (
175 self.model_family_per_estimator
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py in get_oof_per_estimator(self, X, y, return_loss_per_estimator, impute_dropped_instances, _extra_processing)
370 holdout_index_hit_counts = current_repeat
371
--> 372 self._fill_predictions_in_place(
373 model_i=model_i,
374 base_model=base_model,
/kaggle/working/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py in _fill_predictions_in_place(self, model_i, base_model, oof_proba_list, X, y, train_index, test_index, loss_per_estimator, holdout_index_hits, _extra_processing, split_i)
125
126 # Default base models case
--> 127 base_model.fit(fold_X_train, fold_y_train)
128
129 pred = self._predict_oof(base_model, fold_X_test)
/kaggle/working/tabpfn_extensions/rf_pfn/SklearnBasedRandomForestTabPFN.py in fit(self, X, y, sample_weight)
74 if torch.is_tensor(y):
75 y = y.numpy()
---> 76 super().fit(X, y)
77 except TypeError as e:
78 print("Error in fit with data", X, y)
/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight)
343 if issparse(y):
344 raise ValueError("sparse multilabel-indicator for y is not supported.")
--> 345 X, y = self._validate_data(
346 X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE
347 )
/usr/local/lib/python3.10/dist-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
582 y = check_array(y, input_name="y", **check_y_params)
583 else:
--> 584 X, y = check_X_y(X, y, **check_params)
585 out = X, y
586
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1104 )
1105
-> 1106 X = check_array(
1107 X,
1108 accept_sparse=accept_sparse,
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
919
920 if force_all_finite:
--> 921 _assert_all_finite(
922 array,
923 input_name=input_name,
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name)
159 "#estimators-that-handle-nan-values"
160 )
--> 161 raise ValueError(msg_err)
162
163
ValueError: Input X contains NaN.
RandomForestTabPFNRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values
The text was updated successfully, but these errors were encountered: