diff --git a/openml/__init__.py b/openml/__init__.py
index 9a457c146..47bc86b4d 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -35,6 +35,7 @@
utils,
)
from .__version__ import __version__
+from ._api import _backend
from .datasets import OpenMLDataFeature, OpenMLDataset
from .evaluations import OpenMLEvaluation
from .flows import OpenMLFlow
@@ -116,6 +117,7 @@ def populate_cache(
"OpenMLTask",
"__version__",
"_api_calls",
+ "_backend",
"config",
"datasets",
"evaluations",
diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py
new file mode 100644
index 000000000..7766016d1
--- /dev/null
+++ b/openml/_api/__init__.py
@@ -0,0 +1,85 @@
+from .clients import (
+ HTTPCache,
+ HTTPClient,
+ MinIOClient,
+)
+from .resources import (
+ API_REGISTRY,
+ DatasetAPI,
+ DatasetV1API,
+ DatasetV2API,
+ EstimationProcedureAPI,
+ EstimationProcedureV1API,
+ EstimationProcedureV2API,
+ EvaluationAPI,
+ EvaluationMeasureAPI,
+ EvaluationMeasureV1API,
+ EvaluationMeasureV2API,
+ EvaluationV1API,
+ EvaluationV2API,
+ FallbackProxy,
+ FlowAPI,
+ FlowV1API,
+ FlowV2API,
+ ResourceAPI,
+ ResourceV1API,
+ ResourceV2API,
+ RunAPI,
+ RunV1API,
+ RunV2API,
+ SetupAPI,
+ SetupV1API,
+ SetupV2API,
+ StudyAPI,
+ StudyV1API,
+ StudyV2API,
+ TaskAPI,
+ TaskV1API,
+ TaskV2API,
+)
+from .setup import (
+ APIBackend,
+ APIBackendBuilder,
+ _backend,
+)
+
+__all__ = [
+ "API_REGISTRY",
+ "APIBackend",
+ "APIBackendBuilder",
+ "DatasetAPI",
+ "DatasetV1API",
+ "DatasetV2API",
+ "EstimationProcedureAPI",
+ "EstimationProcedureV1API",
+ "EstimationProcedureV2API",
+ "EvaluationAPI",
+ "EvaluationMeasureAPI",
+ "EvaluationMeasureV1API",
+ "EvaluationMeasureV2API",
+ "EvaluationV1API",
+ "EvaluationV2API",
+ "FallbackProxy",
+ "FlowAPI",
+ "FlowV1API",
+ "FlowV2API",
+ "HTTPCache",
+ "HTTPClient",
+ "MinIOClient",
+ "ResourceAPI",
+ "ResourceV1API",
+ "ResourceV2API",
+ "RunAPI",
+ "RunV1API",
+ "RunV2API",
+ "SetupAPI",
+ "SetupV1API",
+ "SetupV2API",
+ "StudyAPI",
+ "StudyV1API",
+ "StudyV2API",
+ "TaskAPI",
+ "TaskV1API",
+ "TaskV2API",
+ "_backend",
+]
diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py
new file mode 100644
index 000000000..42f11fbcf
--- /dev/null
+++ b/openml/_api/clients/__init__.py
@@ -0,0 +1,8 @@
+from .http import HTTPCache, HTTPClient
+from .minio import MinIOClient
+
+__all__ = [
+ "HTTPCache",
+ "HTTPClient",
+ "MinIOClient",
+]
diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py
new file mode 100644
index 000000000..08db3317b
--- /dev/null
+++ b/openml/_api/clients/http.py
@@ -0,0 +1,811 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import math
+import random
+import time
+import xml
+from collections.abc import Callable, Mapping
+from pathlib import Path
+from typing import Any, cast
+from urllib.parse import urlencode, urljoin, urlparse
+
+import requests
+import xmltodict
+from requests import Response
+
+import openml
+from openml.enums import APIVersion, RetryPolicy
+from openml.exceptions import (
+ OpenMLAuthenticationError,
+ OpenMLHashException,
+ OpenMLServerError,
+ OpenMLServerException,
+ OpenMLServerNoResult,
+)
+
+
+class HTTPCache:
+ """
+ Filesystem-based cache for HTTP responses.
+
+ This class stores HTTP responses on disk using a structured directory layout
+ derived from the request URL and parameters. Each cached response consists of
+ three files: metadata (``meta.json``), headers (``headers.json``), and the raw
+ body (``body.bin``).
+
+ Notes
+ -----
+ The cache key is derived from the URL (domain and path components) and query
+ parameters, excluding the ``api_key`` parameter.
+ """
+
+ @property
+ def path(self) -> Path:
+ return Path(openml.config.get_cache_directory())
+
+ def get_key(self, url: str, params: dict[str, Any]) -> str:
+ """
+ Generate a filesystem-safe cache key for a request.
+
+ The key is constructed from the reversed domain components, URL path
+ segments, and URL-encoded query parameters (excluding ``api_key``).
+
+ Parameters
+ ----------
+ url : str
+ The full request URL.
+ params : dict of str to Any
+ Query parameters associated with the request.
+
+ Returns
+ -------
+ str
+ A relative path string representing the cache key.
+ """
+ parsed_url = urlparse(url)
+ netloc_parts = parsed_url.netloc.split(".")[::-1]
+ path_parts = parsed_url.path.strip("/").split("/")
+
+ filtered_params = {k: v for k, v in params.items() if k != "api_key"}
+ params_part = [urlencode(filtered_params)] if filtered_params else []
+
+ return str(Path(*netloc_parts, *path_parts, *params_part))
+
+ def _key_to_path(self, key: str) -> Path:
+ """
+ Convert a cache key into an absolute filesystem path.
+
+ Parameters
+ ----------
+ key : str
+ Cache key as returned by :meth:`get_key`.
+
+ Returns
+ -------
+ pathlib.Path
+ Absolute path corresponding to the cache entry.
+ """
+ return self.path.joinpath(key)
+
+ def load(self, key: str) -> Response:
+ """
+ Load a cached HTTP response from disk.
+
+ Parameters
+ ----------
+ key : str
+ Cache key identifying the stored response.
+
+ Returns
+ -------
+ requests.Response
+ Reconstructed response object with status code, headers, body, and metadata.
+
+ Raises
+ ------
+ FileNotFoundError
+ If the cache entry or required files are missing.
+ ValueError
+ If required metadata is missing or malformed.
+ """
+ path = self._key_to_path(key)
+
+ if not path.exists():
+ raise FileNotFoundError(f"Cache entry not found: {path}")
+
+ meta_path = path / "meta.json"
+ headers_path = path / "headers.json"
+ body_path = path / "body.bin"
+
+ if not (meta_path.exists() and headers_path.exists() and body_path.exists()):
+ raise FileNotFoundError(f"Incomplete cache at {path}")
+
+ with meta_path.open("r", encoding="utf-8") as f:
+ meta = json.load(f)
+
+ with headers_path.open("r", encoding="utf-8") as f:
+ headers = json.load(f)
+
+ body = body_path.read_bytes()
+
+ response = Response()
+ response.status_code = meta["status_code"]
+ response.url = meta["url"]
+ response.reason = meta["reason"]
+ response.headers = headers
+ response._content = body
+ response.encoding = meta["encoding"]
+
+ return response
+
+ def save(self, key: str, response: Response) -> None:
+ """
+ Persist an HTTP response to disk.
+
+ Parameters
+ ----------
+ key : str
+ Cache key identifying where to store the response.
+ response : requests.Response
+ Response object to cache.
+
+ Notes
+ -----
+ The response body is stored as binary data. Headers and metadata
+ (status code, URL, reason, encoding, elapsed time, request info, and
+ creation timestamp) are stored as JSON.
+ """
+ path = self._key_to_path(key)
+ path.mkdir(parents=True, exist_ok=True)
+
+ (path / "body.bin").write_bytes(response.content)
+
+ with (path / "headers.json").open("w", encoding="utf-8") as f:
+ json.dump(dict(response.headers), f)
+
+ meta = {
+ "status_code": response.status_code,
+ "url": response.url,
+ "reason": response.reason,
+ "encoding": response.encoding,
+ "created_at": time.time(),
+ "request": {
+ "method": response.request.method if response.request else None,
+ "url": response.request.url if response.request else None,
+ "headers": dict(response.request.headers) if response.request else None,
+ "body": response.request.body if response.request else None,
+ },
+ }
+
+ with (path / "meta.json").open("w", encoding="utf-8") as f:
+ json.dump(meta, f)
+
+
+class HTTPClient:
+ """
+ HTTP client for interacting with the OpenML API.
+
+ This client supports configurable retry policies, optional filesystem
+ caching, API key authentication, and response validation including
+ checksum verification.
+
+ Parameters
+ ----------
+ api_version : APIVersion
+ Backend API Version.
+ """
+
+ def __init__(
+ self,
+ *,
+ api_version: APIVersion,
+ ) -> None:
+ self.api_version = api_version
+
+ self.cache = HTTPCache()
+
+ @property
+ def server(self) -> str:
+ server = openml.config.servers[self.api_version]["server"]
+ if server is None:
+ servers_repr = {k.value: v for k, v in openml.config.servers.items()}
+ raise ValueError(
+ f'server found to be None for api_version="{self.api_version}" in {servers_repr}'
+ )
+ return cast("str", server)
+
+ @property
+ def api_key(self) -> str | None:
+ return cast("str | None", openml.config.servers[self.api_version]["apikey"])
+
+ @property
+ def retries(self) -> int:
+ return cast("int", openml.config.connection_n_retries)
+
+ @property
+ def retry_policy(self) -> RetryPolicy:
+ return RetryPolicy.HUMAN if openml.config.retry_policy == "human" else RetryPolicy.ROBOT
+
+ @property
+ def retry_func(self) -> Callable:
+ return self._human_delay if self.retry_policy == RetryPolicy.HUMAN else self._robot_delay
+
+ def _robot_delay(self, n: int) -> float:
+ """
+ Compute delay for automated retry policy.
+
+ Parameters
+ ----------
+ n : int
+ Current retry attempt number (1-based).
+
+ Returns
+ -------
+ float
+ Number of seconds to wait before the next retry.
+
+ Notes
+ -----
+ Uses a sigmoid-based growth curve with Gaussian noise to gradually
+ increase waiting time.
+ """
+ wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
+ variation = random.gauss(0, wait / 10)
+ return max(1.0, wait + variation)
+
+ def _human_delay(self, n: int) -> float:
+ """
+ Compute delay for human-like retry policy.
+
+ Parameters
+ ----------
+ n : int
+ Current retry attempt number (1-based).
+
+ Returns
+ -------
+ float
+ Number of seconds to wait before the next retry.
+ """
+ return max(1.0, n)
+
+ def _parse_exception_response(
+ self,
+ response: Response,
+ ) -> tuple[int | None, str]:
+ """
+ Parse an error response returned by the server.
+
+ Parameters
+ ----------
+ response : requests.Response
+ HTTP response containing error details in JSON or XML format.
+
+ Returns
+ -------
+ tuple of (int or None, str)
+ Parsed error code and combined error message. The code may be
+ ``None`` if unavailable.
+ """
+ content_type = response.headers.get("Content-Type", "").lower()
+
+ if "application/json" in content_type:
+ server_exception = response.json()
+ server_error = server_exception["detail"]
+ code = server_error.get("code")
+ message = server_error.get("message")
+ additional_information = server_error.get("additional_information")
+ else:
+ server_exception = xmltodict.parse(response.text)
+ server_error = server_exception["oml:error"]
+ code = server_error.get("oml:code")
+ message = server_error.get("oml:message")
+ additional_information = server_error.get("oml:additional_information")
+
+ if code is not None:
+ code = int(code)
+
+ if message and additional_information:
+ full_message = f"{message} - {additional_information}"
+ else:
+ full_message = message or additional_information or ""
+
+ return code, full_message
+
+ def _raise_code_specific_error(
+ self,
+ code: int,
+ message: str,
+ url: str,
+ files: Mapping[str, Any] | None,
+ ) -> None:
+ """
+ Raise specialized exceptions based on OpenML error codes.
+
+ Parameters
+ ----------
+ code : int
+ Server-provided error code.
+ message : str
+ Parsed error message.
+ url : str
+ Request URL associated with the error.
+ files : Mapping of str to Any or None
+ Files sent with the request, if any.
+
+ Raises
+ ------
+ OpenMLServerNoResult
+ If the error indicates a missing resource.
+ OpenMLNotAuthorizedError
+ If authentication is required or invalid.
+ OpenMLServerException
+ For other server-side errors (except retryable database errors).
+ """
+ if code in [111, 372, 512, 500, 482, 542, 674]:
+ # 512 for runs, 372 for datasets, 500 for flows
+ # 482 for tasks, 542 for evaluations, 674 for setups
+ # 111 for dataset descriptions
+ raise OpenMLServerNoResult(code=code, message=message, url=url)
+
+ # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
+ if code == 163 and files is not None and "description" in files:
+ # file_elements['description'] is the XML file description of the flow
+ message = f"\n{files['description']}\n{message}"
+
+ # Propagate all server errors to the calling functions, except
+ # for 107 which represents a database connection error.
+ # These are typically caused by high server load,
+ # which means trying again might resolve the issue.
+ # DATABASE_CONNECTION_ERRCODE
+ if code != 107:
+ raise OpenMLServerException(code=code, message=message, url=url)
+
+ def _validate_response(
+ self,
+ method: str,
+ url: str,
+ files: Mapping[str, Any] | None,
+ response: Response,
+ ) -> Exception | None:
+ """
+ Validate an HTTP response and determine whether to retry.
+
+ Parameters
+ ----------
+ method : str
+ HTTP method used for the request.
+ url : str
+ Full request URL.
+ files : Mapping of str to Any or None
+ Files sent with the request, if any.
+ response : requests.Response
+ Received HTTP response.
+
+ Returns
+ -------
+ Exception or None
+ ``None`` if the response is valid. Otherwise, an exception
+ indicating the error to raise or retry.
+
+ Raises
+ ------
+ OpenMLServerError
+ For unexpected server errors or malformed responses.
+ """
+ if (
+ "Content-Encoding" not in response.headers
+ or response.headers["Content-Encoding"] != "gzip"
+ ):
+ logging.warning(f"Received uncompressed content from OpenML for {url}.")
+
+ if response.status_code == 200:
+ return None
+
+ if response.status_code == requests.codes.URI_TOO_LONG:
+ raise OpenMLServerError(f"URI too long! ({url})")
+
+ exception: Exception | None = None
+ code: int | None = None
+ message: str = ""
+
+ try:
+ code, message = self._parse_exception_response(response)
+
+ except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e:
+ if method != "GET":
+ extra = f"Status code: {response.status_code}\n{response.text}"
+ raise OpenMLServerError(
+ f"Unexpected server error when calling {url}. Please contact the "
+ f"developers!\n{extra}"
+ ) from e
+
+ exception = e
+
+ except Exception as e:
+ # If we failed to parse it out,
+ # then something has gone wrong in the body we have sent back
+ # from the server and there is little extra information we can capture.
+ raise OpenMLServerError(
+ f"Unexpected server error when calling {url}. Please contact the developers!\n"
+ f"Status code: {response.status_code}\n{response.text}",
+ ) from e
+
+ if code is not None:
+ self._raise_code_specific_error(
+ code=code,
+ message=message,
+ url=url,
+ files=files,
+ )
+
+ if exception is None:
+ exception = OpenMLServerException(code=code, message=message, url=url)
+
+ return exception
+
+ def __request( # noqa: PLR0913
+ self,
+ session: requests.Session,
+ method: str,
+ url: str,
+ params: Mapping[str, Any],
+ data: Mapping[str, Any],
+ headers: Mapping[str, str],
+ files: Mapping[str, Any] | None,
+ **request_kwargs: Any,
+ ) -> tuple[Response | None, Exception | None]:
+ """
+ Execute a single HTTP request attempt.
+
+ Parameters
+ ----------
+ session : requests.Session
+ Active session used to send the request.
+ method : str
+ HTTP method (e.g., ``GET``, ``POST``).
+ url : str
+ Full request URL.
+ params : Mapping of str to Any
+ Query parameters.
+ data : Mapping of str to Any
+ Request body data.
+ headers : Mapping of str to str
+ HTTP headers.
+ files : Mapping of str to Any or None
+ Files to upload.
+ **request_kwargs : Any
+ Additional arguments forwarded to ``requests.Session.request``.
+
+ Returns
+ -------
+ tuple of (requests.Response or None, Exception or None)
+ Response and potential retry exception.
+ """
+ exception: Exception | None = None
+ response: Response | None = None
+
+ try:
+ response = session.request(
+ method=method,
+ url=url,
+ params=params,
+ data=data,
+ headers=headers,
+ files=files,
+ **request_kwargs,
+ )
+ except (
+ requests.exceptions.ChunkedEncodingError,
+ requests.exceptions.ConnectionError,
+ requests.exceptions.SSLError,
+ ) as e:
+ exception = e
+
+ if response is not None:
+ exception = self._validate_response(
+ method=method,
+ url=url,
+ files=files,
+ response=response,
+ )
+
+ return response, exception
+
+ def _request( # noqa: PLR0913, C901
+ self,
+ method: str,
+ path: str,
+ *,
+ enable_cache: bool = False,
+ refresh_cache: bool = False,
+ use_api_key: bool = False,
+ md5_checksum: str | None = None,
+ **request_kwargs: Any,
+ ) -> Response:
+ """
+ Send an HTTP request with retry, caching, and validation support.
+
+ Parameters
+ ----------
+ method : str
+ HTTP method to use.
+ path : str
+ API path relative to the base URL.
+ enable_cache : bool, optional
+ Whether to load/store response from cache.
+ refresh_cache : bool, optional
+ Only used when `enable_cache=True`. If True, ignore any existing
+ cached response and overwrite it with a fresh one.
+ use_api_key : bool, optional
+ Whether to include the API key in query parameters.
+ md5_checksum : str or None, optional
+ Expected MD5 checksum of the response body.
+ **request_kwargs : Any
+ Additional arguments passed to the underlying request.
+
+ Returns
+ -------
+ requests.Response
+ Final validated response.
+
+ Raises
+ ------
+ Exception
+ Propagates network, validation, or server exceptions after retries.
+ OpenMLHashException
+ If checksum verification fails.
+ """
+ url = urljoin(self.server, path)
+ retries = max(1, self.retries)
+
+ params = request_kwargs.pop("params", {}).copy()
+ data = request_kwargs.pop("data", {}).copy()
+
+ if use_api_key:
+ if self.api_key is None:
+ raise OpenMLAuthenticationError(
+ message=(
+ f"The API call {url} requires authentication via an API key. "
+ "Please configure OpenML-Python to use your API "
+ "as described in this example: "
+ "https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
+ )
+ )
+ params["api_key"] = self.api_key
+
+ if method.upper() in {"POST", "PUT", "PATCH"}:
+ data = {**params, **data}
+ params = {}
+
+ # prepare headers
+ headers = request_kwargs.pop("headers", {}).copy()
+ headers.update(openml.config._HEADERS)
+
+ files = request_kwargs.pop("files", None)
+
+ if enable_cache and not refresh_cache:
+ cache_key = self.cache.get_key(url, params)
+ try:
+ return self.cache.load(cache_key)
+ except FileNotFoundError:
+ pass # cache miss, continue
+ except Exception:
+ raise # propagate unexpected cache errors
+
+ with requests.Session() as session:
+ for retry_counter in range(1, retries + 1):
+ response, exception = self.__request(
+ session=session,
+ method=method,
+ url=url,
+ params=params,
+ data=data,
+ headers=headers,
+ files=files,
+ **request_kwargs,
+ )
+
+ # executed successfully
+ if exception is None:
+ break
+ # tries completed
+ if retry_counter >= retries:
+ raise exception
+
+ delay = self.retry_func(retry_counter)
+ time.sleep(delay)
+
+ # response is guaranteed to be not `None`
+ # otherwise an exception would have been raised before
+ response = cast("Response", response)
+
+ if md5_checksum is not None:
+ self._verify_checksum(response, md5_checksum)
+
+ if enable_cache:
+ cache_key = self.cache.get_key(url, params)
+ self.cache.save(cache_key, response)
+
+ return response
+
+ def _verify_checksum(self, response: Response, md5_checksum: str) -> None:
+ """
+ Verify MD5 checksum of a response body.
+
+ Parameters
+ ----------
+ response : requests.Response
+ HTTP response whose content should be verified.
+ md5_checksum : str
+ Expected hexadecimal MD5 checksum.
+
+ Raises
+ ------
+ OpenMLHashException
+ If the computed checksum does not match the expected value.
+ """
+ # ruff sees hashlib.md5 as insecure
+ actual = hashlib.md5(response.content).hexdigest() # noqa: S324
+ if actual != md5_checksum:
+ raise OpenMLHashException(
+ f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} "
+ f"when downloading {response.url}.",
+ )
+
+ def get(
+ self,
+ path: str,
+ *,
+ enable_cache: bool = False,
+ refresh_cache: bool = False,
+ use_api_key: bool = False,
+ md5_checksum: str | None = None,
+ **request_kwargs: Any,
+ ) -> Response:
+ """
+ Send a GET request.
+
+ Parameters
+ ----------
+ path : str
+ API path relative to the base URL.
+ enable_cache : bool, optional
+ Whether to use the response cache.
+ refresh_cache : bool, optional
+ Whether to ignore existing cached entries.
+ use_api_key : bool, optional
+ Whether to include the API key.
+ md5_checksum : str or None, optional
+ Expected MD5 checksum for response validation.
+ **request_kwargs : Any
+ Additional request arguments.
+
+ Returns
+ -------
+ requests.Response
+ HTTP response.
+ """
+ return self._request(
+ method="GET",
+ path=path,
+ enable_cache=enable_cache,
+ refresh_cache=refresh_cache,
+ use_api_key=use_api_key,
+ md5_checksum=md5_checksum,
+ **request_kwargs,
+ )
+
+ def post(
+ self,
+ path: str,
+ *,
+ use_api_key: bool = True,
+ **request_kwargs: Any,
+ ) -> Response:
+ """
+ Send a POST request.
+
+ Parameters
+ ----------
+ path : str
+ API path relative to the base URL.
+ use_api_key : bool, optional
+ Whether to include the API key.
+ **request_kwargs : Any
+ Additional request arguments.
+
+ Returns
+ -------
+ requests.Response
+ HTTP response.
+ """
+ return self._request(
+ method="POST",
+ path=path,
+ enable_cache=False,
+ use_api_key=use_api_key,
+ **request_kwargs,
+ )
+
+ def delete(
+ self,
+ path: str,
+ **request_kwargs: Any,
+ ) -> Response:
+ """
+ Send a DELETE request.
+
+ Parameters
+ ----------
+ path : str
+ API path relative to the base URL.
+ **request_kwargs : Any
+ Additional request arguments.
+
+ Returns
+ -------
+ requests.Response
+ HTTP response.
+ """
+ return self._request(
+ method="DELETE",
+ path=path,
+ enable_cache=False,
+ use_api_key=True,
+ **request_kwargs,
+ )
+
+ def download(
+ self,
+ url: str,
+ handler: Callable[[Response, Path, str], None] | None = None,
+ encoding: str = "utf-8",
+ file_name: str = "response.txt",
+ md5_checksum: str | None = None,
+ ) -> Path:
+ """
+ Download a resource and store it in the cache directory.
+
+ Parameters
+ ----------
+ url : str
+ Absolute URL of the resource to download.
+ handler : callable or None, optional
+ Custom handler function accepting ``(response, path, encoding)``
+ and returning a ``pathlib.Path``.
+ encoding : str, optional
+ Text encoding used when writing the response body.
+ file_name : str, optional
+ Name of the saved file.
+ md5_checksum : str or None, optional
+ Expected MD5 checksum for integrity verification.
+
+ Returns
+ -------
+ pathlib.Path
+ Path to the downloaded file.
+
+ Raises
+ ------
+ OpenMLHashException
+ If checksum verification fails.
+ """
+ base = self.cache.path
+ file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name
+ file_path = file_path.expanduser()
+ file_path.parent.mkdir(parents=True, exist_ok=True)
+ if file_path.exists():
+ return file_path
+
+ response = self.get(url, md5_checksum=md5_checksum)
+
+ def write_to_file(response: Response, path: Path, encoding: str) -> None:
+ path.write_text(response.text, encoding)
+
+ handler = handler or write_to_file
+ handler(response, file_path, encoding)
+ return file_path
diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py
new file mode 100644
index 000000000..920b485e0
--- /dev/null
+++ b/openml/_api/clients/minio.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import openml
+
+
+class MinIOClient:
+ """
+ Lightweight client configuration for interacting with a MinIO-compatible
+ object storage service.
+
+ This class stores basic configuration such as a base filesystem path and
+ default HTTP headers. It is intended to be extended with actual request
+ or storage logic elsewhere.
+
+ Attributes
+ ----------
+ path : pathlib.Path or None
+ Configured base path for storage operations.
+ headers : dict of str to str
+ Default HTTP headers, including a user-agent identifying the
+ OpenML Python client version.
+ """
+
+ @property
+ def path(self) -> Path:
+ return Path(openml.config.get_cache_directory())
diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py
new file mode 100644
index 000000000..6d957966e
--- /dev/null
+++ b/openml/_api/resources/__init__.py
@@ -0,0 +1,63 @@
+from ._registry import API_REGISTRY
+from .base import (
+ DatasetAPI,
+ EstimationProcedureAPI,
+ EvaluationAPI,
+ EvaluationMeasureAPI,
+ FallbackProxy,
+ FlowAPI,
+ ResourceAPI,
+ ResourceV1API,
+ ResourceV2API,
+ RunAPI,
+ SetupAPI,
+ StudyAPI,
+ TaskAPI,
+)
+from .dataset import DatasetV1API, DatasetV2API
+from .estimation_procedure import (
+ EstimationProcedureV1API,
+ EstimationProcedureV2API,
+)
+from .evaluation import EvaluationV1API, EvaluationV2API
+from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API
+from .flow import FlowV1API, FlowV2API
+from .run import RunV1API, RunV2API
+from .setup import SetupV1API, SetupV2API
+from .study import StudyV1API, StudyV2API
+from .task import TaskV1API, TaskV2API
+
+__all__ = [
+ "API_REGISTRY",
+ "DatasetAPI",
+ "DatasetV1API",
+ "DatasetV2API",
+ "EstimationProcedureAPI",
+ "EstimationProcedureV1API",
+ "EstimationProcedureV2API",
+ "EvaluationAPI",
+ "EvaluationMeasureAPI",
+ "EvaluationMeasureV1API",
+ "EvaluationMeasureV2API",
+ "EvaluationV1API",
+ "EvaluationV2API",
+ "FallbackProxy",
+ "FlowAPI",
+ "FlowV1API",
+ "FlowV2API",
+ "ResourceAPI",
+ "ResourceV1API",
+ "ResourceV2API",
+ "RunAPI",
+ "RunV1API",
+ "RunV2API",
+ "SetupAPI",
+ "SetupV1API",
+ "SetupV2API",
+ "StudyAPI",
+ "StudyV1API",
+ "StudyV2API",
+ "TaskAPI",
+ "TaskV1API",
+ "TaskV2API",
+]
diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py
new file mode 100644
index 000000000..66d7ec428
--- /dev/null
+++ b/openml/_api/resources/_registry.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml.enums import APIVersion, ResourceType
+
+from .dataset import DatasetV1API, DatasetV2API
+from .estimation_procedure import (
+ EstimationProcedureV1API,
+ EstimationProcedureV2API,
+)
+from .evaluation import EvaluationV1API, EvaluationV2API
+from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API
+from .flow import FlowV1API, FlowV2API
+from .run import RunV1API, RunV2API
+from .setup import SetupV1API, SetupV2API
+from .study import StudyV1API, StudyV2API
+from .task import TaskV1API, TaskV2API
+
+if TYPE_CHECKING:
+ from .base import ResourceAPI
+
+API_REGISTRY: dict[
+ APIVersion,
+ dict[ResourceType, type[ResourceAPI]],
+] = {
+ APIVersion.V1: {
+ ResourceType.DATASET: DatasetV1API,
+ ResourceType.TASK: TaskV1API,
+ ResourceType.EVALUATION_MEASURE: EvaluationMeasureV1API,
+ ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV1API,
+ ResourceType.EVALUATION: EvaluationV1API,
+ ResourceType.FLOW: FlowV1API,
+ ResourceType.STUDY: StudyV1API,
+ ResourceType.RUN: RunV1API,
+ ResourceType.SETUP: SetupV1API,
+ },
+ APIVersion.V2: {
+ ResourceType.DATASET: DatasetV2API,
+ ResourceType.TASK: TaskV2API,
+ ResourceType.EVALUATION_MEASURE: EvaluationMeasureV2API,
+ ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV2API,
+ ResourceType.EVALUATION: EvaluationV2API,
+ ResourceType.FLOW: FlowV2API,
+ ResourceType.STUDY: StudyV2API,
+ ResourceType.RUN: RunV2API,
+ ResourceType.SETUP: SetupV2API,
+ },
+}
diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py
new file mode 100644
index 000000000..ed6dc26f7
--- /dev/null
+++ b/openml/_api/resources/base/__init__.py
@@ -0,0 +1,30 @@
+from .base import ResourceAPI
+from .fallback import FallbackProxy
+from .resources import (
+ DatasetAPI,
+ EstimationProcedureAPI,
+ EvaluationAPI,
+ EvaluationMeasureAPI,
+ FlowAPI,
+ RunAPI,
+ SetupAPI,
+ StudyAPI,
+ TaskAPI,
+)
+from .versions import ResourceV1API, ResourceV2API
+
+__all__ = [
+ "DatasetAPI",
+ "EstimationProcedureAPI",
+ "EvaluationAPI",
+ "EvaluationMeasureAPI",
+ "FallbackProxy",
+ "FlowAPI",
+ "ResourceAPI",
+ "ResourceV1API",
+ "ResourceV2API",
+ "RunAPI",
+ "SetupAPI",
+ "StudyAPI",
+ "TaskAPI",
+]
diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py
new file mode 100644
index 000000000..625681e3b
--- /dev/null
+++ b/openml/_api/resources/base/base.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, NoReturn
+
+from openml.exceptions import (
+ OpenMLNotAuthorizedError,
+ OpenMLNotSupportedError,
+ OpenMLServerError,
+ OpenMLServerException,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import Mapping
+ from typing import Any
+
+ from openml._api.clients import HTTPClient, MinIOClient
+ from openml.enums import APIVersion, ResourceType
+
+
+class ResourceAPI(ABC):
+ """
+ Abstract base class for OpenML resource APIs.
+
+ This class defines the common interface for interacting with OpenML
+ resources (e.g., datasets, flows, runs) across different API versions.
+ Concrete subclasses must implement the resource-specific operations
+ such as publishing, deleting, and tagging.
+
+ Parameters
+ ----------
+ http : HTTPClient
+ Configured HTTP client used for communication with the OpenML API.
+ minio : MinIOClient
+ Configured MinIO client used for object storage operations.
+
+ Attributes
+ ----------
+ api_version : APIVersion
+ API version implemented by the resource.
+ resource_type : ResourceType
+ Type of OpenML resource handled by the implementation.
+ _http : HTTPClient
+ Internal HTTP client instance.
+ _minio : MinIOClient or None
+ Internal MinIO client instance, if provided.
+ """
+
+ api_version: APIVersion
+ resource_type: ResourceType
+
+ def __init__(self, http: HTTPClient, minio: MinIOClient):
+ self._http = http
+ self._minio = minio
+
+ @abstractmethod
+ def delete(self, resource_id: int) -> bool:
+ """
+ Delete a resource by its identifier.
+
+ Parameters
+ ----------
+ resource_id : int
+ Unique identifier of the resource to delete.
+
+ Returns
+ -------
+ bool
+ ``True`` if the deletion was successful.
+
+ Notes
+ -----
+ Concrete subclasses must implement this method.
+ """
+
+ @abstractmethod
+ def publish(self, path: str, files: Mapping[str, Any] | None) -> int:
+ """
+ Publish a new resource to the OpenML server.
+
+ Parameters
+ ----------
+ path : str
+ API endpoint path used for publishing the resource.
+ files : Mapping of str to Any or None
+ Files or payload data required for publishing. The structure
+ depends on the resource type.
+
+ Returns
+ -------
+ int
+ Identifier of the newly created resource.
+
+ Notes
+ -----
+ Concrete subclasses must implement this method.
+ """
+
+ @abstractmethod
+ def tag(self, resource_id: int, tag: str) -> list[str]:
+ """
+ Add a tag to a resource.
+
+ Parameters
+ ----------
+ resource_id : int
+ Identifier of the resource to tag.
+ tag : str
+ Tag to associate with the resource.
+
+ Returns
+ -------
+ list of str
+ Updated list of tags assigned to the resource.
+
+ Notes
+ -----
+ Concrete subclasses must implement this method.
+ """
+
+ @abstractmethod
+ def untag(self, resource_id: int, tag: str) -> list[str]:
+ """
+ Remove a tag from a resource.
+
+ Parameters
+ ----------
+ resource_id : int
+ Identifier of the resource to untag.
+ tag : str
+ Tag to remove from the resource.
+
+ Returns
+ -------
+ list of str
+ Updated list of tags assigned to the resource.
+
+ Notes
+ -----
+ Concrete subclasses must implement this method.
+ """
+
+ @abstractmethod
+ def _get_endpoint_name(self) -> str:
+ """
+ Return the endpoint name for the current resource type.
+
+ Returns
+ -------
+ str
+ Endpoint segment used in API paths.
+
+ Notes
+ -----
+ Datasets use the special endpoint name ``"data"`` instead of
+ their enum value.
+ """
+
+ def _handle_delete_exception(
+ self, resource_type: str, exception: OpenMLServerException
+ ) -> None:
+ """
+ Map V1 deletion error codes to more specific exceptions.
+
+ Parameters
+ ----------
+ resource_type : str
+ Endpoint name of the resource type.
+ exception : OpenMLServerException
+ Original exception raised during deletion.
+
+ Raises
+ ------
+ OpenMLNotAuthorizedError
+ If the resource cannot be deleted due to ownership or
+ dependent entities.
+ OpenMLServerError
+ If deletion fails for an unknown reason.
+ OpenMLServerException
+ If the error code is not specially handled.
+ """
+ # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
+ # Most exceptions are descriptive enough to be raised as their standard
+ # OpenMLServerException, however there are two cases where we add information:
+ # - a generic "failed" message, we direct them to the right issue board
+ # - when the user successfully authenticates with the server,
+ # but user is not allowed to take the requested action,
+ # in which case we specify a OpenMLNotAuthorizedError.
+ by_other_user = [323, 353, 393, 453, 594]
+ has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
+ unknown_reason = [325, 355, 394, 455, 593]
+ if exception.code in by_other_user:
+ raise OpenMLNotAuthorizedError(
+ message=(
+ f"The {resource_type} can not be deleted because it was not uploaded by you."
+ ),
+ ) from exception
+ if exception.code in has_dependent_entities:
+ raise OpenMLNotAuthorizedError(
+ message=(
+ f"The {resource_type} can not be deleted because "
+ f"it still has associated entities: {exception.message}"
+ ),
+ ) from exception
+ if exception.code in unknown_reason:
+ raise OpenMLServerError(
+ message=(
+ f"The {resource_type} can not be deleted for unknown reason,"
+ " please open an issue at: https://github.com/openml/openml/issues/new"
+ ),
+ ) from exception
+ raise exception
+
+ def _not_supported(self, *, method: str) -> NoReturn:
+ """
+ Raise an error indicating that a method is not supported.
+
+ Parameters
+ ----------
+ method : str
+ Name of the unsupported method.
+
+ Raises
+ ------
+ OpenMLNotSupportedError
+ If the current API version does not support the requested method
+ for the given resource type.
+ """
+ version = getattr(self.api_version, "value", "unknown")
+ resource = getattr(self.resource_type, "value", "unknown")
+
+ raise OpenMLNotSupportedError(
+ f"{self.__class__.__name__}: "
+ f"{version} API does not support `{method}` "
+ f"for resource `{resource}`"
+ )
diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py
new file mode 100644
index 000000000..9b8f64a17
--- /dev/null
+++ b/openml/_api/resources/base/fallback.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any
+
+from openml.exceptions import OpenMLNotSupportedError
+
+
+class FallbackProxy:
+ """
+ Proxy object that provides transparent fallback across multiple API versions.
+
+ This class delegates attribute access to a sequence of API implementations.
+ When a callable attribute is invoked and raises ``OpenMLNotSupportedError``,
+ the proxy automatically attempts the same method on subsequent API instances
+ until one succeeds.
+
+ Parameters
+ ----------
+ *api_versions : Any
+ One or more API implementation instances ordered by priority.
+ The first API is treated as the primary implementation, and
+ subsequent APIs are used as fallbacks.
+
+ Raises
+ ------
+ ValueError
+ If no API implementations are provided.
+
+ Notes
+ -----
+ Attribute lookup is performed dynamically via ``__getattr__``.
+ Only methods that raise ``OpenMLNotSupportedError`` trigger fallback
+ behavior. Other exceptions are propagated immediately.
+ """
+
+ def __init__(self, *api_versions: Any):
+ if not api_versions:
+ raise ValueError("At least one API version must be provided")
+ self._apis = api_versions
+
+ def __getattr__(self, name: str) -> Any:
+ """
+ Dynamically resolve attribute access across API implementations.
+
+ Parameters
+ ----------
+ name : str
+ Name of the attribute being accessed.
+
+ Returns
+ -------
+ Any
+ The resolved attribute. If it is callable, a wrapped function
+ providing fallback behavior is returned.
+
+ Raises
+ ------
+ AttributeError
+ If none of the API implementations define the attribute.
+ """
+ api, attr = self._find_attr(name)
+ if callable(attr):
+ return self._wrap_callable(name, api, attr)
+ return attr
+
+ def _find_attr(self, name: str) -> tuple[Any, Any]:
+ """
+ Find the first API implementation that defines a given attribute.
+
+ Parameters
+ ----------
+ name : str
+ Name of the attribute to search for.
+
+ Returns
+ -------
+ tuple of (Any, Any)
+ The API instance and the corresponding attribute.
+
+ Raises
+ ------
+ AttributeError
+ If no API implementation defines the attribute.
+ """
+ for api in self._apis:
+ attr = getattr(api, name, None)
+ if attr is not None:
+ return api, attr
+ raise AttributeError(f"{self.__class__.__name__} has no attribute {name}")
+
+ def _wrap_callable(
+ self,
+ name: str,
+ primary_api: Any,
+ primary_attr: Callable[..., Any],
+ ) -> Callable[..., Any]:
+ """
+ Wrap a callable attribute to enable fallback behavior.
+
+ Parameters
+ ----------
+ name : str
+ Name of the method being wrapped.
+ primary_api : Any
+ Primary API instance providing the callable.
+ primary_attr : Callable[..., Any]
+ Callable attribute obtained from the primary API.
+
+ Returns
+ -------
+ Callable[..., Any]
+ Wrapped function that attempts the primary call first and
+ falls back to other APIs if ``OpenMLNotSupportedError`` is raised.
+ """
+
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ try:
+ return primary_attr(*args, **kwargs)
+ except OpenMLNotSupportedError:
+ return self._call_fallbacks(name, primary_api, *args, **kwargs)
+
+ return wrapper
+
+ def _call_fallbacks(
+ self,
+ name: str,
+ skip_api: Any,
+ *args: Any,
+ **kwargs: Any,
+ ) -> Any:
+ """
+ Attempt to call a method on fallback API implementations.
+
+ Parameters
+ ----------
+ name : str
+ Name of the method to invoke.
+ skip_api : Any
+ API instance to skip (typically the primary API that already failed).
+ *args : Any
+ Positional arguments passed to the method.
+ **kwargs : Any
+ Keyword arguments passed to the method.
+
+ Returns
+ -------
+ Any
+ Result returned by the first successful fallback invocation.
+
+ Raises
+ ------
+ OpenMLNotSupportedError
+ If all API implementations either do not define the method
+ or raise ``OpenMLNotSupportedError``.
+ """
+ for api in self._apis:
+ if api is skip_api:
+ continue
+ attr = getattr(api, name, None)
+ if callable(attr):
+ try:
+ return attr(*args, **kwargs)
+ except OpenMLNotSupportedError:
+ continue
+ raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}")
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
new file mode 100644
index 000000000..ede0e1034
--- /dev/null
+++ b/openml/_api/resources/base/resources.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from openml.enums import ResourceType
+
+from .base import ResourceAPI
+
+
+class DatasetAPI(ResourceAPI):
+ """Abstract API interface for dataset resources."""
+
+ resource_type: ResourceType = ResourceType.DATASET
+
+
+class TaskAPI(ResourceAPI):
+ """Abstract API interface for task resources."""
+
+ resource_type: ResourceType = ResourceType.TASK
+
+
+class EvaluationMeasureAPI(ResourceAPI):
+ """Abstract API interface for evaluation measure resources."""
+
+ resource_type: ResourceType = ResourceType.EVALUATION_MEASURE
+
+
+class EstimationProcedureAPI(ResourceAPI):
+ """Abstract API interface for estimation procedure resources."""
+
+ resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE
+
+
+class EvaluationAPI(ResourceAPI):
+ """Abstract API interface for evaluation resources."""
+
+ resource_type: ResourceType = ResourceType.EVALUATION
+
+
+class FlowAPI(ResourceAPI):
+ """Abstract API interface for flow resources."""
+
+ resource_type: ResourceType = ResourceType.FLOW
+
+
+class StudyAPI(ResourceAPI):
+ """Abstract API interface for study resources."""
+
+ resource_type: ResourceType = ResourceType.STUDY
+
+
+class RunAPI(ResourceAPI):
+ """Abstract API interface for run resources."""
+
+ resource_type: ResourceType = ResourceType.RUN
+
+
+class SetupAPI(ResourceAPI):
+ """Abstract API interface for setup resources."""
+
+ resource_type: ResourceType = ResourceType.SETUP
diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py
new file mode 100644
index 000000000..bba59b869
--- /dev/null
+++ b/openml/_api/resources/base/versions.py
@@ -0,0 +1,261 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, cast
+
+import xmltodict
+
+from openml.enums import APIVersion, ResourceType
+from openml.exceptions import (
+ OpenMLServerException,
+)
+
+from .base import ResourceAPI
+
+_LEGAL_RESOURCES_DELETE = [
+ ResourceType.DATASET,
+ ResourceType.TASK,
+ ResourceType.FLOW,
+ ResourceType.STUDY,
+ ResourceType.RUN,
+ ResourceType.USER,
+]
+
+_LEGAL_RESOURCES_TAG = [
+ ResourceType.DATASET,
+ ResourceType.TASK,
+ ResourceType.FLOW,
+ ResourceType.SETUP,
+ ResourceType.RUN,
+]
+
+
+class ResourceV1API(ResourceAPI):
+ """
+ Version 1 implementation of the OpenML resource API.
+
+ This class provides XML-based implementations for publishing,
+ deleting, tagging, and untagging resources using the V1 API
+ endpoints. Responses are parsed using ``xmltodict``.
+
+ Notes
+ -----
+ V1 endpoints expect and return XML. Error handling follows the
+ legacy OpenML server behavior and maps specific error codes to
+ more descriptive exceptions where appropriate.
+ """
+
+ api_version: APIVersion = APIVersion.V1
+
+ def publish(self, path: str, files: Mapping[str, Any] | None) -> int:
+ """
+ Publish a new resource using the V1 API.
+
+ Parameters
+ ----------
+ path : str
+ API endpoint path for the upload.
+ files : Mapping of str to Any or None
+ Files to upload as part of the request payload.
+
+ Returns
+ -------
+ int
+ Identifier of the newly created resource.
+
+ Raises
+ ------
+ ValueError
+ If the server response does not contain a valid resource ID.
+ OpenMLServerException
+ If the server returns an error during upload.
+ """
+ response = self._http.post(path, files=files)
+ parsed_response = xmltodict.parse(response.content)
+ return self._extract_id_from_upload(parsed_response)
+
+ def delete(self, resource_id: int) -> bool:
+ """
+ Delete a resource using the V1 API.
+
+ Parameters
+ ----------
+ resource_id : int
+ Identifier of the resource to delete.
+
+ Returns
+ -------
+ bool
+ ``True`` if the server confirms successful deletion.
+
+ Raises
+ ------
+ ValueError
+ If the resource type is not supported for deletion.
+ OpenMLNotAuthorizedError
+ If the user is not permitted to delete the resource.
+ OpenMLServerError
+ If deletion fails for an unknown reason.
+ OpenMLServerException
+ For other server-side errors.
+ """
+ if self.resource_type not in _LEGAL_RESOURCES_DELETE:
+ raise ValueError(f"Can't delete a {self.resource_type.value}")
+
+ endpoint_name = self._get_endpoint_name()
+ path = f"{endpoint_name}/{resource_id}"
+ try:
+ response = self._http.delete(path)
+ result = xmltodict.parse(response.content)
+ return f"oml:{endpoint_name}_delete" in result
+ except OpenMLServerException as e:
+ self._handle_delete_exception(endpoint_name, e)
+ raise
+
+ def tag(self, resource_id: int, tag: str) -> list[str]:
+ """
+ Add a tag to a resource using the V1 API.
+
+ Parameters
+ ----------
+ resource_id : int
+ Identifier of the resource to tag.
+ tag : str
+ Tag to associate with the resource.
+
+ Returns
+ -------
+ list of str
+ Updated list of tags assigned to the resource.
+
+ Raises
+ ------
+ ValueError
+ If the resource type does not support tagging.
+ OpenMLServerException
+ If the server returns an error.
+ """
+ if self.resource_type not in _LEGAL_RESOURCES_TAG:
+ raise ValueError(f"Can't tag a {self.resource_type.value}")
+
+ endpoint_name = self._get_endpoint_name()
+ path = f"{endpoint_name}/tag"
+ data = {f"{endpoint_name}_id": resource_id, "tag": tag}
+ response = self._http.post(path, data=data)
+
+ parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"})
+ result = parsed_response[f"oml:{endpoint_name}_tag"]
+ tags: list[str] = result.get("oml:tag", [])
+
+ return tags
+
+ def untag(self, resource_id: int, tag: str) -> list[str]:
+ """
+ Remove a tag from a resource using the V1 API.
+
+ Parameters
+ ----------
+ resource_id : int
+ Identifier of the resource to untag.
+ tag : str
+ Tag to remove from the resource.
+
+ Returns
+ -------
+ list of str
+ Updated list of tags assigned to the resource.
+
+ Raises
+ ------
+ ValueError
+ If the resource type does not support tagging.
+ OpenMLServerException
+ If the server returns an error.
+ """
+ if self.resource_type not in _LEGAL_RESOURCES_TAG:
+ raise ValueError(f"Can't untag a {self.resource_type.value}")
+
+ endpoint_name = self._get_endpoint_name()
+ path = f"{endpoint_name}/untag"
+ data = {f"{endpoint_name}_id": resource_id, "tag": tag}
+ response = self._http.post(path, data=data)
+
+ parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"})
+ result = parsed_response[f"oml:{endpoint_name}_untag"]
+ tags: list[str] = result.get("oml:tag", [])
+
+ return tags
+
+ def _get_endpoint_name(self) -> str:
+ if self.resource_type == ResourceType.DATASET:
+ return "data"
+ return cast("str", self.resource_type.value)
+
+ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int:
+ """
+ Extract the resource identifier from an XML upload response.
+
+ Parameters
+ ----------
+ parsed : Mapping of str to Any
+ Parsed XML response as returned by ``xmltodict.parse``.
+
+ Returns
+ -------
+ int
+ Extracted resource identifier.
+
+ Raises
+ ------
+ ValueError
+ If the response structure is unexpected or no identifier
+ can be found.
+ """
+ # reads id from upload response
+ # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}}
+
+ # xmltodict always gives exactly one root key
+ ((_, root_value),) = parsed.items()
+
+ if not isinstance(root_value, Mapping):
+ raise ValueError("Unexpected XML structure")
+
+ # Look for oml:id directly in the root value
+ if "oml:id" in root_value:
+ id_value = root_value["oml:id"]
+ if isinstance(id_value, (str, int)):
+ return int(id_value)
+
+ # Fallback: check all values for numeric/string IDs
+ for v in root_value.values():
+ if isinstance(v, (str, int)):
+ return int(v)
+
+ raise ValueError("No ID found in upload response")
+
+
+class ResourceV2API(ResourceAPI):
+ """
+ Version 2 implementation of the OpenML resource API.
+
+ This class represents the V2 API for resources. Operations such as
+ publishing, deleting, tagging, and untagging are currently not
+ supported and will raise ``OpenMLNotSupportedError``.
+ """
+
+ api_version: APIVersion = APIVersion.V2
+
+ def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002
+ self._not_supported(method="publish")
+
+ def delete(self, resource_id: int) -> bool: # noqa: ARG002
+ self._not_supported(method="delete")
+
+ def tag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002
+ self._not_supported(method="tag")
+
+ def untag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002
+ self._not_supported(method="untag")
+
+ def _get_endpoint_name(self) -> str:
+ return cast("str", self.resource_type.value)
diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py
new file mode 100644
index 000000000..520594df9
--- /dev/null
+++ b/openml/_api/resources/dataset.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import DatasetAPI, ResourceV1API, ResourceV2API
+
+
+class DatasetV1API(ResourceV1API, DatasetAPI):
+ """Version 1 API implementation for dataset resources."""
+
+
+class DatasetV2API(ResourceV2API, DatasetAPI):
+ """Version 2 API implementation for dataset resources."""
diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py
new file mode 100644
index 000000000..a45f7af66
--- /dev/null
+++ b/openml/_api/resources/estimation_procedure.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API
+
+
+class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI):
+ """Version 1 API implementation for estimation procedure resources."""
+
+
+class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI):
+ """Version 2 API implementation for estimation procedure resources."""
diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py
new file mode 100644
index 000000000..fe7e360a6
--- /dev/null
+++ b/openml/_api/resources/evaluation.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EvaluationAPI, ResourceV1API, ResourceV2API
+
+
+class EvaluationV1API(ResourceV1API, EvaluationAPI):
+ """Version 1 API implementation for evaluation resources."""
+
+
+class EvaluationV2API(ResourceV2API, EvaluationAPI):
+ """Version 2 API implementation for evaluation resources."""
diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py
new file mode 100644
index 000000000..4ed5097f7
--- /dev/null
+++ b/openml/_api/resources/evaluation_measure.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API
+
+
+class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI):
+ """Version 1 API implementation for evaluation measure resources."""
+
+
+class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI):
+ """Version 2 API implementation for evaluation measure resources."""
diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py
new file mode 100644
index 000000000..1716d89d3
--- /dev/null
+++ b/openml/_api/resources/flow.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import FlowAPI, ResourceV1API, ResourceV2API
+
+
+class FlowV1API(ResourceV1API, FlowAPI):
+ """Version 1 API implementation for flow resources."""
+
+
+class FlowV2API(ResourceV2API, FlowAPI):
+ """Version 2 API implementation for flow resources."""
diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py
new file mode 100644
index 000000000..4caccb0b6
--- /dev/null
+++ b/openml/_api/resources/run.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, RunAPI
+
+
+class RunV1API(ResourceV1API, RunAPI):
+ """Version 1 API implementation for run resources."""
+
+
+class RunV2API(ResourceV2API, RunAPI):
+ """Version 2 API implementation for run resources."""
diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py
new file mode 100644
index 000000000..2896d3d9f
--- /dev/null
+++ b/openml/_api/resources/setup.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, SetupAPI
+
+
+class SetupV1API(ResourceV1API, SetupAPI):
+ """Version 1 API implementation for setup resources."""
+
+
+class SetupV2API(ResourceV2API, SetupAPI):
+ """Version 2 API implementation for setup resources."""
diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py
new file mode 100644
index 000000000..fb073555c
--- /dev/null
+++ b/openml/_api/resources/study.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, StudyAPI
+
+
+class StudyV1API(ResourceV1API, StudyAPI):
+ """Version 1 API implementation for study resources."""
+
+
+class StudyV2API(ResourceV2API, StudyAPI):
+ """Version 2 API implementation for study resources."""
diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py
new file mode 100644
index 000000000..1f62aa3f3
--- /dev/null
+++ b/openml/_api/resources/task.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, TaskAPI
+
+
+class TaskV1API(ResourceV1API, TaskAPI):
+ """Version 1 API implementation for task resources."""
+
+
+class TaskV2API(ResourceV2API, TaskAPI):
+ """Version 2 API implementation for task resources."""
diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py
new file mode 100644
index 000000000..80545824f
--- /dev/null
+++ b/openml/_api/setup/__init__.py
@@ -0,0 +1,10 @@
+from .backend import APIBackend
+from .builder import APIBackendBuilder
+
+_backend = APIBackend.get_instance()
+
+__all__ = [
+ "APIBackend",
+ "APIBackendBuilder",
+ "_backend",
+]
diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py
new file mode 100644
index 000000000..1604fd074
--- /dev/null
+++ b/openml/_api/setup/backend.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, ClassVar, cast
+
+import openml
+
+from .builder import APIBackendBuilder
+
+if TYPE_CHECKING:
+ from openml._api.clients import HTTPClient, MinIOClient
+ from openml._api.resources import (
+ DatasetAPI,
+ EstimationProcedureAPI,
+ EvaluationAPI,
+ EvaluationMeasureAPI,
+ FlowAPI,
+ RunAPI,
+ SetupAPI,
+ StudyAPI,
+ TaskAPI,
+ )
+
+
+class APIBackend:
+ """
+ Central backend for accessing all OpenML API resource interfaces.
+
+ This class provides a singleton interface to dataset, task, flow,
+ evaluation, run, setup, study, and other resource APIs. It also
+ manages configuration through a nested ``Config`` object and
+ allows dynamic retrieval and updating of configuration values.
+
+ Parameters
+ ----------
+ config : Config, optional
+ Optional configuration object. If not provided, a default
+ ``Config`` instance is created.
+
+ Attributes
+ ----------
+ dataset : DatasetAPI
+ Interface for dataset-related API operations.
+ task : TaskAPI
+ Interface for task-related API operations.
+ evaluation_measure : EvaluationMeasureAPI
+ Interface for evaluation measure-related API operations.
+ estimation_procedure : EstimationProcedureAPI
+ Interface for estimation procedure-related API operations.
+ evaluation : EvaluationAPI
+ Interface for evaluation-related API operations.
+ flow : FlowAPI
+ Interface for flow-related API operations.
+ study : StudyAPI
+ Interface for study-related API operations.
+ run : RunAPI
+ Interface for run-related API operations.
+ setup : SetupAPI
+ Interface for setup-related API operations.
+ """
+
+ _instance: ClassVar[APIBackend | None] = None
+ _backends: ClassVar[dict[str, APIBackendBuilder]] = {}
+
+ @property
+ def _backend(self) -> APIBackendBuilder:
+ api_version = openml.config.api_version
+ fallback_api_version = openml.config.fallback_api_version
+ key = f"{api_version}_{fallback_api_version}"
+
+ if key not in self._backends:
+ _backend = APIBackendBuilder(
+ api_version=api_version,
+ fallback_api_version=fallback_api_version,
+ )
+ self._backends[key] = _backend
+
+ return self._backends[key]
+
+ @property
+ def dataset(self) -> DatasetAPI:
+ return cast("DatasetAPI", self._backend.dataset)
+
+ @property
+ def task(self) -> TaskAPI:
+ return cast("TaskAPI", self._backend.task)
+
+ @property
+ def evaluation_measure(self) -> EvaluationMeasureAPI:
+ return cast("EvaluationMeasureAPI", self._backend.evaluation_measure)
+
+ @property
+ def estimation_procedure(self) -> EstimationProcedureAPI:
+ return cast("EstimationProcedureAPI", self._backend.estimation_procedure)
+
+ @property
+ def evaluation(self) -> EvaluationAPI:
+ return cast("EvaluationAPI", self._backend.evaluation)
+
+ @property
+ def flow(self) -> FlowAPI:
+ return cast("FlowAPI", self._backend.flow)
+
+ @property
+ def study(self) -> StudyAPI:
+ return cast("StudyAPI", self._backend.study)
+
+ @property
+ def run(self) -> RunAPI:
+ return cast("RunAPI", self._backend.run)
+
+ @property
+ def setup(self) -> SetupAPI:
+ return cast("SetupAPI", self._backend.setup)
+
+ @property
+ def http_client(self) -> HTTPClient:
+ return cast("HTTPClient", self._backend.http_client)
+
+ @property
+ def fallback_http_client(self) -> HTTPClient | None:
+ return cast("HTTPClient | None", self._backend.fallback_http_client)
+
+ @property
+ def minio_client(self) -> MinIOClient:
+ return cast("MinIOClient", self._backend.minio_client)
+
+ @classmethod
+ def get_instance(cls) -> APIBackend:
+ """
+ Get the singleton instance of the APIBackend.
+
+ Returns
+ -------
+ APIBackend
+ Singleton instance of the backend.
+ """
+ if cls._instance is None:
+ cls._instance = cls()
+ return cls._instance
diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py
new file mode 100644
index 000000000..76d6e0970
--- /dev/null
+++ b/openml/_api/setup/builder.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml._api.clients import HTTPClient, MinIOClient
+from openml._api.resources import (
+ API_REGISTRY,
+ FallbackProxy,
+)
+from openml.enums import ResourceType
+
+if TYPE_CHECKING:
+ from openml._api.resources import ResourceAPI
+ from openml.enums import APIVersion
+
+
+class APIBackendBuilder:
+ """
+ Builder for constructing API backend instances with all resource-specific APIs.
+
+ This class organizes resource-specific API objects (datasets, tasks,
+ flows, evaluations, runs, setups, studies, etc.) and provides a
+ centralized access point for both the primary API version and an
+ optional fallback API version.
+
+ The constructor automatically initializes:
+
+ - HTTPClient for the primary API version
+ - Optional HTTPClient for a fallback API version
+ - MinIOClient for file storage operations
+ - Resource-specific API instances, optionally wrapped with fallback proxies
+
+ Parameters
+ ----------
+ api_version : APIVersion
+ The primary API version to use for all resource APIs and HTTP communication.
+ fallback_api_version : APIVersion | None, default=None
+ Optional fallback API version to wrap resource APIs with a FallbackProxy.
+
+ Attributes
+ ----------
+ dataset : ResourceAPI | FallbackProxy
+ API interface for dataset resources.
+ task : ResourceAPI | FallbackProxy
+ API interface for task resources.
+ evaluation_measure : ResourceAPI | FallbackProxy
+ API interface for evaluation measure resources.
+ estimation_procedure : ResourceAPI | FallbackProxy
+ API interface for estimation procedure resources.
+ evaluation : ResourceAPI | FallbackProxy
+ API interface for evaluation resources.
+ flow : ResourceAPI | FallbackProxy
+ API interface for flow resources.
+ study : ResourceAPI | FallbackProxy
+ API interface for study resources.
+ run : ResourceAPI | FallbackProxy
+ API interface for run resources.
+ setup : ResourceAPI | FallbackProxy
+ API interface for setup resources.
+ http_client : HTTPClient
+ Client for HTTP communication using the primary API version.
+ fallback_http_client : HTTPClient | None
+ Client for HTTP communication using the fallback API version, if provided.
+ minio_client : MinIOClient
+ Client for file storage operations (MinIO/S3).
+ """
+
+ dataset: ResourceAPI | FallbackProxy
+ task: ResourceAPI | FallbackProxy
+ evaluation_measure: ResourceAPI | FallbackProxy
+ estimation_procedure: ResourceAPI | FallbackProxy
+ evaluation: ResourceAPI | FallbackProxy
+ flow: ResourceAPI | FallbackProxy
+ study: ResourceAPI | FallbackProxy
+ run: ResourceAPI | FallbackProxy
+ setup: ResourceAPI | FallbackProxy
+ http_client: HTTPClient
+ fallback_http_client: HTTPClient | None
+ minio_client: MinIOClient
+
+ def __init__(self, api_version: APIVersion, fallback_api_version: APIVersion | None = None):
+ # initialize clients and resource APIs in-place
+ self._build(api_version, fallback_api_version)
+
+ def _build(self, api_version: APIVersion, fallback_api_version: APIVersion | None) -> None:
+ """
+ Construct an APIBackendBuilder instance from a configuration.
+
+ This method initializes HTTP and MinIO clients, creates resource-specific
+ API instances for the primary API version, and optionally wraps them
+ with fallback proxies if a fallback API version is configured.
+
+ Parameters
+ ----------
+ config : Config
+ Configuration object containing API versions, endpoints, cache
+ settings, and connection parameters.
+
+ Returns
+ -------
+ APIBackendBuilder
+ Builder instance with all resource API interfaces initialized.
+ """
+ minio_client = MinIOClient()
+ primary_http_client = HTTPClient(api_version=api_version)
+
+ self.http_client = primary_http_client
+ self.minio_client = minio_client
+ self.fallback_http_client = None
+
+ resource_apis: dict[ResourceType, ResourceAPI | FallbackProxy] = {}
+ for resource_type, resource_api_cls in API_REGISTRY[api_version].items():
+ resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client)
+
+ if fallback_api_version is not None:
+ fallback_http_client = HTTPClient(api_version=fallback_api_version)
+ self.fallback_http_client = fallback_http_client
+
+ fallback_resource_apis: dict[ResourceType, ResourceAPI | FallbackProxy] = {}
+ for resource_type, resource_api_cls in API_REGISTRY[fallback_api_version].items():
+ fallback_resource_apis[resource_type] = resource_api_cls(
+ fallback_http_client, minio_client
+ )
+
+ resource_apis = {
+ name: FallbackProxy(resource_apis[name], fallback_resource_apis[name])
+ for name in resource_apis
+ }
+
+ self.dataset = resource_apis[ResourceType.DATASET]
+ self.task = resource_apis[ResourceType.TASK]
+ self.evaluation_measure = resource_apis[ResourceType.EVALUATION_MEASURE]
+ self.estimation_procedure = resource_apis[ResourceType.ESTIMATION_PROCEDURE]
+ self.evaluation = resource_apis[ResourceType.EVALUATION]
+ self.flow = resource_apis[ResourceType.FLOW]
+ self.study = resource_apis[ResourceType.STUDY]
+ self.run = resource_apis[ResourceType.RUN]
+ self.setup = resource_apis[ResourceType.SETUP]
diff --git a/openml/_config.py b/openml/_config.py
index a7034b9b4..1abcee7c7 100644
--- a/openml/_config.py
+++ b/openml/_config.py
@@ -12,16 +12,70 @@
import warnings
from collections.abc import Iterator
from contextlib import contextmanager
+from copy import deepcopy
from dataclasses import dataclass, field, fields, replace
from io import StringIO
from pathlib import Path
from typing import Any, ClassVar, Literal, cast
from urllib.parse import urlparse
+from openml.enums import APIVersion
+
+from .__version__ import __version__
+
logger = logging.getLogger(__name__)
openml_logger = logging.getLogger("openml")
+_PROD_SERVERS: dict[APIVersion, dict[str, str | None]] = {
+ APIVersion.V1: {
+ "server": "https://www.openml.org/api/v1/xml/",
+ "apikey": None,
+ },
+ APIVersion.V2: {
+ "server": None,
+ "apikey": None,
+ },
+}
+
+_TEST_SERVERS: dict[APIVersion, dict[str, str | None]] = {
+ APIVersion.V1: {
+ "server": "https://test.openml.org/api/v1/xml/",
+ "apikey": "normaluser",
+ },
+ APIVersion.V2: {
+ "server": None,
+ "apikey": None,
+ },
+}
+
+_TEST_SERVERS_LOCAL: dict[APIVersion, dict[str, str | None]] = {
+ APIVersion.V1: {
+ "server": "http://localhost:8000/api/v1/xml/",
+ "apikey": "normaluser",
+ },
+ APIVersion.V2: {
+ "server": "http://localhost:8082/",
+ "apikey": "AD000000000000000000000000000000",
+ },
+}
+
+_SERVERS_REGISTRY: dict[str, dict[APIVersion, dict[str, str | None]]] = {
+ "production": _PROD_SERVERS,
+ "test": _TEST_SERVERS_LOCAL
+ if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true"
+ else _TEST_SERVERS,
+}
+
+
+def _get_servers(mode: str) -> dict[APIVersion, dict[str, str | None]]:
+ if mode not in _SERVERS_REGISTRY:
+ raise ValueError(
+ f'invalid mode="{mode}" allowed modes: {", ".join(list(_SERVERS_REGISTRY.keys()))}'
+ )
+ return deepcopy(_SERVERS_REGISTRY[mode])
+
+
def _resolve_default_cache_dir() -> Path:
user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR")
if user_defined_cache_dir is not None:
@@ -57,19 +111,38 @@ def _resolve_default_cache_dir() -> Path:
class OpenMLConfig:
"""Dataclass storing the OpenML configuration."""
- apikey: str | None = ""
- server: str = "https://www.openml.org/api/v1/xml"
+ servers: dict[APIVersion, dict[str, str | None]] = field(
+ default_factory=lambda: _get_servers("production")
+ )
+ api_version: APIVersion = APIVersion.V1
+ fallback_api_version: APIVersion | None = None
cachedir: Path = field(default_factory=_resolve_default_cache_dir)
avoid_duplicate_runs: bool = False
retry_policy: Literal["human", "robot"] = "human"
connection_n_retries: int = 5
show_progress: bool = False
- def __setattr__(self, name: str, value: Any) -> None:
- if name == "apikey" and not isinstance(value, (type(None), str)):
- raise TypeError("apikey must be a string or None")
+ @property
+ def server(self) -> str:
+ server = self.servers[self.api_version]["server"]
+ if server is None:
+ servers_repr = {k.value: v for k, v in self.servers.items()}
+ raise ValueError(
+ f'server found to be None for api_version="{self.api_version}" in {servers_repr}'
+ )
+ return server
+
+ @server.setter
+ def server(self, value: str | None) -> None:
+ self.servers[self.api_version]["server"] = value
- super().__setattr__(name, value)
+ @property
+ def apikey(self) -> str | None:
+ return self.servers[self.api_version]["apikey"]
+
+ @apikey.setter
+ def apikey(self, value: str | None) -> None:
+ self.servers[self.api_version]["apikey"] = value
class OpenMLConfigManager:
@@ -81,9 +154,8 @@ def __init__(self) -> None:
self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR"
self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
- self._TEST_SERVER_NORMAL_USER_KEY = "normaluser"
self.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
- self.TEST_SERVER_URL = "https://test.openml.org"
+ self._HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
self._config: OpenMLConfig = OpenMLConfig()
# for legacy test `test_non_writable_home`
@@ -116,7 +188,7 @@ def __setattr__(self, name: str, value: Any) -> None:
"_examples",
"OPENML_CACHE_DIR_ENV_VAR",
"OPENML_SKIP_PARQUET_ENV_VAR",
- "_TEST_SERVER_NORMAL_USER_KEY",
+ "_HEADERS",
}:
return object.__setattr__(self, name, value)
@@ -127,6 +199,10 @@ def __setattr__(self, name: str, value: Any) -> None:
object.__setattr__(self, "_config", replace(self._config, **{name: value}))
return None
+ if name in ["server", "apikey"]:
+ setattr(self._config, name, value)
+ return None
+
object.__setattr__(self, name, value)
return None
@@ -190,6 +266,48 @@ def get_server_base_url(self) -> str:
domain, _ = self._config.server.split("/api", maxsplit=1)
return domain.replace("api", "www")
+ def _get_servers(self, mode: str) -> dict[APIVersion, dict[str, str | None]]:
+ return _get_servers(mode)
+
+ def _set_servers(self, mode: str) -> None:
+ servers = self._get_servers(mode)
+ self._config = replace(self._config, servers=servers)
+
+ def get_production_servers(self) -> dict[APIVersion, dict[str, str | None]]:
+ return self._get_servers(mode="production")
+
+ def get_test_servers(self) -> dict[APIVersion, dict[str, str | None]]:
+ return self._get_servers(mode="test")
+
+ def use_production_servers(self) -> None:
+ self._set_servers(mode="production")
+
+ def use_test_servers(self) -> None:
+ self._set_servers(mode="test")
+
+ def set_api_version(
+ self,
+ api_version: APIVersion,
+ fallback_api_version: APIVersion | None = None,
+ ) -> None:
+ if api_version not in APIVersion:
+ raise ValueError(
+ f'invalid api_version="{api_version}" '
+ f"allowed versions: {', '.join(list(APIVersion))}"
+ )
+
+ if fallback_api_version is not None and fallback_api_version not in APIVersion:
+ raise ValueError(
+ f'invalid fallback_api_version="{fallback_api_version}" '
+ f"allowed versions: {', '.join(list(APIVersion))}"
+ )
+
+ self._config = replace(
+ self._config,
+ api_version=api_version,
+ fallback_api_version=fallback_api_version,
+ )
+
def set_retry_policy(
self, value: Literal["human", "robot"], n_retries: int | None = None
) -> None:
@@ -317,13 +435,18 @@ def _setup(self, config: dict[str, Any] | None = None) -> None:
self._config = replace(
self._config,
- apikey=config["apikey"],
- server=config["server"],
+ servers=config["servers"],
+ api_version=config["api_version"],
+ fallback_api_version=config["fallback_api_version"],
show_progress=config["show_progress"],
avoid_duplicate_runs=config["avoid_duplicate_runs"],
retry_policy=config["retry_policy"],
connection_n_retries=int(config["connection_n_retries"]),
)
+ if "server" in config:
+ self._config.server = config["server"]
+ if "apikey" in config:
+ self._config.apikey = config["apikey"]
user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR)
if user_defined_cache_dir is not None:
@@ -393,14 +516,12 @@ def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str,
class ConfigurationForExamples:
"""Allows easy switching to and from a test configuration, used for examples."""
- _last_used_server = None
- _last_used_key = None
+ _last_used_servers = None
_start_last_called = False
def __init__(self, manager: OpenMLConfigManager):
self._manager = manager
- self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY
- self._test_server = f"{manager.TEST_SERVER_URL}/api/v1/xml"
+ self._test_servers = manager.get_test_servers()
def start_using_configuration_for_example(self) -> None:
"""Sets the configuration to connect to the test server with valid apikey.
@@ -408,27 +529,22 @@ def start_using_configuration_for_example(self) -> None:
To configuration as was before this call is stored, and can be recovered
by using the `stop_use_example_configuration` method.
"""
- if (
- self._start_last_called
- and self._manager._config.server == self._test_server
- and self._manager._config.apikey == self._test_apikey
- ):
+ if self._start_last_called and self._manager._config.servers == self._test_servers:
# Method is called more than once in a row without modifying the server or apikey.
# We don't want to save the current test configuration as a last used configuration.
return
- self._last_used_server = self._manager._config.server
- self._last_used_key = self._manager._config.apikey
+ self._last_used_servers = self._manager._config.servers
type(self)._start_last_called = True
# Test server key for examples
self._manager._config = replace(
self._manager._config,
- server=self._test_server,
- apikey=self._test_apikey,
+ servers=self._test_servers,
)
+ test_server = self._test_servers[self._manager._config.api_version]["server"]
warnings.warn(
- f"Switching to the test server {self._test_server} to not upload results to "
+ f"Switching to the test server {test_server} to not upload results to "
"the live server. Using the test server may result in reduced performance of the "
"API!",
stacklevel=2,
@@ -446,8 +562,7 @@ def stop_using_configuration_for_example(self) -> None:
self._manager._config = replace(
self._manager._config,
- server=cast("str", self._last_used_server),
- apikey=cast("str", self._last_used_key),
+ servers=cast("dict[APIVersion, dict[str, str | None]]", self._last_used_servers),
)
type(self)._start_last_called = False
diff --git a/openml/cli.py b/openml/cli.py
index 838f774d1..1415d0af9 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -8,10 +8,12 @@
from collections.abc import Callable
from dataclasses import fields
from pathlib import Path
+from typing import cast
from urllib.parse import urlparse
import openml
from openml.__version__ import __version__
+from openml.enums import APIVersion
def is_hex(string_: str) -> bool:
@@ -110,9 +112,9 @@ def check_server(server: str) -> str:
def replace_shorthand(server: str) -> str:
if server == "test":
- return f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
+ return cast("str", openml.config.get_test_servers()[APIVersion.V1]["server"])
if server == "production_server":
- return "https://www.openml.org/api/v1/xml"
+ return cast("str", openml.config.get_production_servers()[APIVersion.V1]["server"])
return server
configure_field(
diff --git a/openml/enums.py b/openml/enums.py
new file mode 100644
index 000000000..f5a4381b7
--- /dev/null
+++ b/openml/enums.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from enum import Enum
+
+
+class APIVersion(str, Enum):
+ """Supported OpenML API versions."""
+
+ V1 = "v1"
+ V2 = "v2"
+
+
+class ResourceType(str, Enum):
+ """Canonical resource types exposed by the OpenML API."""
+
+ DATASET = "dataset"
+ TASK = "task"
+ TASK_TYPE = "task_type"
+ EVALUATION_MEASURE = "evaluation_measure"
+ ESTIMATION_PROCEDURE = "estimation_procedure"
+ EVALUATION = "evaluation"
+ FLOW = "flow"
+ STUDY = "study"
+ RUN = "run"
+ SETUP = "setup"
+ USER = "user"
+
+
+class RetryPolicy(str, Enum):
+ """Retry behavior for failed API requests."""
+
+ HUMAN = "human"
+ ROBOT = "robot"
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 1c1343ff3..e96ebfcb2 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -88,3 +88,7 @@ def __init__(self, message: str):
class ObjectNotPublishedError(PyOpenMLError):
"""Indicates an object has not been published yet."""
+
+
+class OpenMLNotSupportedError(PyOpenMLError):
+ """Raised when an API operation is not supported for a resource/version."""
diff --git a/openml/testing.py b/openml/testing.py
index 9f694f9bf..5151a5a62 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -47,9 +47,7 @@ class TestBase(unittest.TestCase):
"user": [],
}
flow_name_tracker: ClassVar[list[str]] = []
- test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
- user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
# creating logger for tracking files uploaded to test server
logger = logging.getLogger("unit_tests_published_entities")
@@ -99,8 +97,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
os.chdir(self.workdir)
self.cached = True
- openml.config.apikey = TestBase.user_key
- self.production_server = "https://www.openml.org/api/v1/xml"
openml.config.set_root_cache_directory(str(self.workdir))
# Increase the number of retries to avoid spurious server failures
@@ -114,8 +110,7 @@ def use_production_server(self) -> None:
Please use this sparingly - it is better to use the test server.
"""
- openml.config.server = self.production_server
- openml.config.apikey = ""
+ openml.config.use_production_servers()
def tearDown(self) -> None:
"""Tear down the test"""
diff --git a/tests/conftest.py b/tests/conftest.py
index 1967f1fad..35d40809d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -34,6 +34,8 @@
from pathlib import Path
import pytest
import openml_sklearn
+from openml._api import HTTPClient, MinIOClient
+from openml.enums import APIVersion
import openml
from openml.testing import TestBase
@@ -97,8 +99,7 @@ def delete_remote_files(tracker, flow_names) -> None:
:param tracker: Dict
:return: None
"""
- openml.config.server = TestBase.test_server
- openml.config.apikey = TestBase.user_key
+ openml.config.use_test_servers()
# reordering to delete sub flows at the end of flows
# sub-flows have shorter names, hence, sorting by descending order of flow name length
@@ -250,8 +251,23 @@ def test_files_directory() -> Path:
@pytest.fixture(scope="session")
-def test_api_key() -> str:
- return TestBase.user_key
+def test_server_v1() -> str:
+ return openml.config.get_test_servers()[APIVersion.V1]["server"]
+
+
+@pytest.fixture(scope="session")
+def test_apikey_v1() -> str:
+ return openml.config.get_test_servers()[APIVersion.V1]["apikey"]
+
+
+@pytest.fixture(scope="session")
+def test_server_v2() -> str:
+ return openml.config.get_test_servers()[APIVersion.V2]["server"]
+
+
+@pytest.fixture(scope="session")
+def test_apikey_v2() -> str:
+ return openml.config.get_test_servers()[APIVersion.V2]["apikey"]
@pytest.fixture(autouse=True, scope="function")
@@ -272,15 +288,14 @@ def as_robot() -> Iterator[None]:
@pytest.fixture(autouse=True)
def with_server(request):
- if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true":
- openml.config.TEST_SERVER_URL = "http://localhost:8000"
+ openml.config.set_api_version(APIVersion.V1)
+
if "production_server" in request.keywords:
- openml.config.server = "https://www.openml.org/api/v1/xml"
- openml.config.apikey = None
+ openml.config.use_production_servers()
yield
return
- openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
- openml.config.apikey = TestBase.user_key
+
+ openml.config.use_test_servers()
yield
@@ -315,4 +330,19 @@ def workdir(tmp_path):
original_cwd = Path.cwd()
os.chdir(tmp_path)
yield tmp_path
- os.chdir(original_cwd)
\ No newline at end of file
+ os.chdir(original_cwd)
+
+
+@pytest.fixture
+def http_client_v1() -> HTTPClient:
+ return HTTPClient(api_version=APIVersion.V1)
+
+
+@pytest.fixture
+def http_client_v2() -> HTTPClient:
+ return HTTPClient(api_version=APIVersion.V2)
+
+
+@pytest.fixture
+def minio_client() -> MinIOClient:
+ return MinIOClient()
diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py
new file mode 100644
index 000000000..9783777f7
--- /dev/null
+++ b/tests/test_api/test_http.py
@@ -0,0 +1,259 @@
+from requests import Response, Request, Session
+from unittest.mock import patch
+import pytest
+import os
+import hashlib
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+from openml.enums import APIVersion
+from openml.exceptions import OpenMLAuthenticationError
+from openml._api import HTTPClient, HTTPCache
+import openml
+
+
+@pytest.fixture
+def cache(http_client_v1) -> HTTPCache:
+ return http_client_v1.cache
+
+
+@pytest.fixture
+def http_client(http_client_v1) -> HTTPClient:
+ return http_client_v1
+
+
+@pytest.fixture
+def sample_path() -> str:
+ return "task/1"
+
+
+@pytest.fixture
+def sample_url_v1(sample_path, test_server_v1) -> str:
+ return urljoin(test_server_v1, sample_path)
+
+
+@pytest.fixture
+def sample_download_url_v1(test_server_v1) -> str:
+ server = test_server_v1.split("api/")[0]
+ endpoint = "data/v1/download/1/anneal.arff"
+ url = server + endpoint
+ return url
+
+
+def test_cache(cache, sample_url_v1):
+ params = {"param1": "value1", "param2": "value2"}
+
+ parsed_url = urlparse(sample_url_v1)
+ netloc_parts = parsed_url.netloc.split(".")[::-1]
+ path_parts = parsed_url.path.strip("/").split("/")
+ params_key = "&".join([f"{k}={v}" for k, v in params.items()])
+
+
+ key = cache.get_key(sample_url_v1, params)
+
+ expected_key = os.path.join(
+ *netloc_parts,
+ *path_parts,
+ params_key,
+ )
+
+ assert key == expected_key
+
+ # mock response
+ req = Request("GET", sample_url_v1).prepare()
+ response = Response()
+ response.status_code = 200
+ response.url = sample_url_v1
+ response.reason = "OK"
+ response._content = b"test"
+ response.headers = {"Content-Type": "text/xml"}
+ response.encoding = "utf-8"
+ response.request = req
+ response.elapsed = type("Elapsed", (), {"total_seconds": lambda x: 0.1})()
+
+ cache.save(key, response)
+ cached = cache.load(key)
+
+ assert cached.status_code == 200
+ assert cached.url == sample_url_v1
+ assert cached.content == b"test"
+ assert cached.headers["Content-Type"] == "text/xml"
+
+
+@pytest.mark.test_server()
+def test_get(http_client):
+ response = http_client.get("task/1")
+
+ assert response.status_code == 200
+ assert b" DummyTaskV1API:
+ return DummyTaskV1API(http=http_client_v1, minio=minio_client)
+
+
+@pytest.fixture
+def dummy_task_v2(http_client_v2, minio_client) -> DummyTaskV1API:
+ return DummyTaskV2API(http=http_client_v2, minio=minio_client)
+
+
+@pytest.fixture
+def dummy_task_fallback(dummy_task_v1, dummy_task_v2) -> DummyTaskV1API:
+ return FallbackProxy(dummy_task_v2, dummy_task_v1)
+
+
+def test_v1_publish(dummy_task_v1, test_server_v1, test_apikey_v1):
+ resource = dummy_task_v1
+ resource_name = resource.resource_type.value
+ resource_files = {"description": "Resource Description File"}
+ resource_id = 123
+
+ with patch.object(Session, "request") as mock_request:
+ mock_request.return_value = Response()
+ mock_request.return_value.status_code = 200
+ mock_request.return_value._content = (
+ f'\n'
+ f"\t{resource_id}\n"
+ f"\n"
+ ).encode("utf-8")
+
+ published_resource_id = resource.publish(
+ resource_name,
+ files=resource_files,
+ )
+
+ assert resource_id == published_resource_id
+
+ mock_request.assert_called_once_with(
+ method="POST",
+ url=test_server_v1 + resource_name,
+ params={},
+ data={"api_key": test_apikey_v1},
+ headers=openml.config._HEADERS,
+ files=resource_files,
+ )
+
+
+def test_v1_delete(dummy_task_v1, test_server_v1, test_apikey_v1):
+ resource = dummy_task_v1
+ resource_name = resource.resource_type.value
+ resource_id = 123
+
+ with patch.object(Session, "request") as mock_request:
+ mock_request.return_value = Response()
+ mock_request.return_value.status_code = 200
+ mock_request.return_value._content = (
+ f'\n'
+ f" {resource_id}\n"
+ f"\n"
+ ).encode("utf-8")
+
+ resource.delete(resource_id)
+
+ mock_request.assert_called_once_with(
+ method="DELETE",
+ url=(
+ test_server_v1
+ + resource_name
+ + "/"
+ + str(resource_id)
+ ),
+ params={"api_key": test_apikey_v1},
+ data={},
+ headers=openml.config._HEADERS,
+ files=None,
+ )
+
+
+def test_v1_tag(dummy_task_v1, test_server_v1, test_apikey_v1):
+ resource = dummy_task_v1
+ resource_id = 123
+ resource_tag = "TAG"
+
+ with patch.object(Session, "request") as mock_request:
+ mock_request.return_value = Response()
+ mock_request.return_value.status_code = 200
+ mock_request.return_value._content = (
+ f''
+ f"{resource_id}"
+ f"{resource_tag}"
+ f""
+ ).encode("utf-8")
+
+ tags = resource.tag(resource_id, resource_tag)
+
+ assert resource_tag in tags
+
+ mock_request.assert_called_once_with(
+ method="POST",
+ url=(
+ test_server_v1
+ + resource.resource_type
+ + "/tag"
+ ),
+ params={},
+ data={
+ "api_key": test_apikey_v1,
+ "task_id": resource_id,
+ "tag": resource_tag,
+ },
+ headers=openml.config._HEADERS,
+ files=None,
+ )
+
+
+def test_v1_untag(dummy_task_v1, test_server_v1, test_apikey_v1):
+ resource = dummy_task_v1
+ resource_id = 123
+ resource_tag = "TAG"
+
+ with patch.object(Session, "request") as mock_request:
+ mock_request.return_value = Response()
+ mock_request.return_value.status_code = 200
+ mock_request.return_value._content = (
+ f''
+ f"{resource_id}"
+ f""
+ ).encode("utf-8")
+
+ tags = resource.untag(resource_id, resource_tag)
+
+ assert resource_tag not in tags
+
+ mock_request.assert_called_once_with(
+ method="POST",
+ url=(
+ test_server_v1
+ + resource.resource_type
+ + "/untag"
+ ),
+ params={},
+ data={
+ "api_key": test_apikey_v1,
+ "task_id": resource_id,
+ "tag": resource_tag,
+ },
+ headers=openml.config._HEADERS,
+ files=None,
+ )
+
+
+def test_v2_publish(dummy_task_v2):
+ with pytest.raises(OpenMLNotSupportedError):
+ dummy_task_v2.publish(path=None, files=None)
+
+
+def test_v2_delete(dummy_task_v2):
+ with pytest.raises(OpenMLNotSupportedError):
+ dummy_task_v2.delete(resource_id=None)
+
+
+def test_v2_tag(dummy_task_v2):
+ with pytest.raises(OpenMLNotSupportedError):
+ dummy_task_v2.tag(resource_id=None, tag=None)
+
+
+def test_v2_untag(dummy_task_v2):
+ with pytest.raises(OpenMLNotSupportedError):
+ dummy_task_v2.untag(resource_id=None, tag=None)
+
+
+def test_fallback_publish(dummy_task_fallback):
+ with patch.object(ResourceV1API, "publish") as mock_publish:
+ mock_publish.return_value = None
+ dummy_task_fallback.publish(path=None, files=None)
+ mock_publish.assert_called_once_with(path=None, files=None)
+
+
+def test_fallback_delete(dummy_task_fallback):
+ with patch.object(ResourceV1API, "delete") as mock_delete:
+ mock_delete.return_value = None
+ dummy_task_fallback.delete(resource_id=None)
+ mock_delete.assert_called_once_with(resource_id=None)
+
+
+def test_fallback_tag(dummy_task_fallback):
+ with patch.object(ResourceV1API, "tag") as mock_tag:
+ mock_tag.return_value = None
+ dummy_task_fallback.tag(resource_id=None, tag=None)
+ mock_tag.assert_called_once_with(resource_id=None, tag=None)
+
+
+def test_fallback_untag(dummy_task_fallback):
+ with patch.object(ResourceV1API, "untag") as mock_untag:
+ mock_untag.return_value = None
+ dummy_task_fallback.untag(resource_id=None, tag=None)
+ mock_untag.assert_called_once_with(resource_id=None, tag=None)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 974fb36ef..80b0b4215 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -157,7 +157,6 @@ def test_check_datasets_active(self):
openml.datasets.check_datasets_active,
[79],
)
- openml.config.server = self.test_server
@pytest.mark.test_server()
def test_illegal_character_tag(self):
@@ -185,7 +184,6 @@ def test__name_to_id_with_deactivated(self):
self.use_production_server()
# /d/1 was deactivated
assert openml.datasets.functions._name_to_id("anneal") == 2
- openml.config.server = self.test_server
@pytest.mark.production_server()
def test__name_to_id_with_multiple_active(self):
@@ -1552,7 +1550,6 @@ def test_list_datasets_with_high_size_parameter(self):
datasets_b = openml.datasets.list_datasets(size=np.inf)
# Reverting to test server
- openml.config.server = self.test_server
assert len(datasets_a) == len(datasets_b)
@@ -1727,7 +1724,7 @@ def test_delete_dataset(self):
@mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
)
@@ -1742,13 +1739,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
):
openml.datasets.delete_dataset(40_000)
- dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+ dataset_url = test_server_v1 + "data/40000"
assert dataset_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_with_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
)
@@ -1763,13 +1760,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
):
openml.datasets.delete_dataset(40_000)
- dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+ dataset_url = test_server_v1 + "data/40000"
assert dataset_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
)
@@ -1781,13 +1778,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
success = openml.datasets.delete_dataset(40000)
assert success
- dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+ dataset_url = test_server_v1 + "data/40000"
assert dataset_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_dataset(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
)
@@ -1802,9 +1799,9 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
):
openml.datasets.delete_dataset(9_999_999)
- dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
+ dataset_url = test_server_v1 + "data/9999999"
assert dataset_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
def _assert_datasets_have_id_and_valid_status(datasets: pd.DataFrame):
@@ -1996,14 +1993,14 @@ def test_read_features_from_xml_with_whitespace() -> None:
@pytest.mark.test_server()
-def test_get_dataset_parquet(requests_mock, test_files_directory):
+def test_get_dataset_parquet(requests_mock, test_files_directory, test_server_v1):
# Parquet functionality is disabled on the test server
# There is no parquet-copy of the test server yet.
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
)
# While the mocked example is from production, unit tests by default connect to the test server.
- requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
+ requests_mock.get(test_server_v1 + "data/61", text=content_file.read_text())
dataset = openml.datasets.get_dataset(61, download_data=True)
assert dataset._parquet_url is not None
assert dataset.parquet_file is not None
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 14bb78060..7a1331c45 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -453,7 +453,7 @@ def test_delete_flow(self):
@mock.patch.object(requests.Session, "delete")
-def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -466,13 +466,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+ flow_url = test_server_v1 + "flow/40000"
assert flow_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_with_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -485,13 +485,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+ flow_url = test_server_v1 + "flow/40000"
assert flow_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
+def test_delete_subflow(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -504,13 +504,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)
- flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+ flow_url = test_server_v1 + "flow/40000"
assert flow_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -520,14 +520,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
success = openml.flows.delete_flow(33364)
assert success
- flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
+ flow_url = test_server_v1 + "flow/33364"
assert flow_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
-def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_flow(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -540,6 +540,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(9_999_999)
- flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
+ flow_url = test_server_v1 + "flow/9999999"
assert flow_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index f3feca784..f50aeadaa 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -9,12 +9,14 @@
from typing import Any, Iterator
from pathlib import Path
import platform
+from urllib.parse import urlparse
import pytest
import openml
import openml.testing
from openml.testing import TestBase
+from openml.enums import APIVersion
@contextmanager
@@ -77,22 +79,24 @@ def test_get_config_as_dict(self):
"""Checks if the current configuration is returned accurately as a dict."""
config = openml.config.get_config_as_dict()
_config = {}
- _config["apikey"] = TestBase.user_key
- _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
+ _config["api_version"] = APIVersion.V1
+ _config["fallback_api_version"] = None
+ _config["servers"] = openml.config.get_test_servers()
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
_config["connection_n_retries"] = 20
_config["retry_policy"] = "robot"
_config["show_progress"] = False
assert isinstance(config, dict)
- assert len(config) == 7
+ assert len(config) == 8
self.assertDictEqual(config, _config)
def test_setup_with_config(self):
"""Checks if the OpenML configuration can be updated using _setup()."""
_config = {}
- _config["apikey"] = TestBase.user_key
- _config["server"] = "https://www.openml.org/api/v1/xml"
+ _config["api_version"] = APIVersion.V1
+ _config["fallback_api_version"] = None
+ _config["servers"] = openml.config.get_test_servers()
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = True
_config["retry_policy"] = "human"
@@ -109,26 +113,22 @@ class TestConfigurationForExamples(openml.testing.TestBase):
@pytest.mark.production_server()
def test_switch_to_example_configuration(self):
"""Verifies the test configuration is loaded properly."""
- # Below is the default test key which would be used anyway, but just for clarity:
- openml.config.apikey = "any-api-key"
- openml.config.server = self.production_server
+ openml.config.use_production_servers()
openml.config.start_using_configuration_for_example()
- assert openml.config.apikey == TestBase.user_key
- assert openml.config.server == self.test_server
+ assert openml.config.servers == openml.config.get_test_servers()
@pytest.mark.production_server()
def test_switch_from_example_configuration(self):
"""Verifies the previous configuration is loaded after stopping."""
# Below is the default test key which would be used anyway, but just for clarity:
- openml.config.apikey = TestBase.user_key
- openml.config.server = self.production_server
+ openml.config.use_production_servers()
openml.config.start_using_configuration_for_example()
openml.config.stop_using_configuration_for_example()
- assert openml.config.apikey == TestBase.user_key
- assert openml.config.server == self.production_server
+
+ assert openml.config.servers == openml.config.get_production_servers()
def test_example_configuration_stop_before_start(self):
"""Verifies an error is raised if `stop_...` is called before `start_...`."""
@@ -145,15 +145,13 @@ def test_example_configuration_stop_before_start(self):
@pytest.mark.production_server()
def test_example_configuration_start_twice(self):
"""Checks that the original config can be returned to if `start..` is called twice."""
- openml.config.apikey = TestBase.user_key
- openml.config.server = self.production_server
+ openml.config.use_production_servers()
openml.config.start_using_configuration_for_example()
openml.config.start_using_configuration_for_example()
openml.config.stop_using_configuration_for_example()
- assert openml.config.apikey == TestBase.user_key
- assert openml.config.server == self.production_server
+ assert openml.config.servers == openml.config.get_production_servers()
def test_configuration_file_not_overwritten_on_load():
@@ -190,5 +188,71 @@ def test_openml_cache_dir_env_var(tmp_path: Path) -> None:
with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)):
openml.config._setup()
+
assert openml.config._root_cache_directory == expected_path
assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www")
+
+
+@pytest.mark.parametrize("mode", ["production", "test"])
+@pytest.mark.parametrize("api_version", [APIVersion.V1, APIVersion.V2])
+def test_get_servers(mode, api_version):
+ orig_servers = openml.config._get_servers(mode)
+
+ openml.config._set_servers(mode)
+ openml.config.set_api_version(api_version)
+ openml.config.server = "temp-server1"
+ openml.config.apikey = "temp-apikey1"
+ openml.config._get_servers(mode)["server"] = 'temp-server2'
+ openml.config._get_servers(mode)["apikey"] = 'temp-server2'
+
+ assert openml.config._get_servers(mode) == orig_servers
+
+
+@pytest.mark.parametrize("mode", ["production", "test"])
+@pytest.mark.parametrize("api_version", [APIVersion.V1, APIVersion.V2])
+def test_set_servers(mode, api_version):
+ openml.config._set_servers(mode)
+ openml.config.set_api_version(api_version)
+
+ assert openml.config.servers == openml.config._get_servers(mode)
+ assert openml.config.api_version == api_version
+
+ openml.config.server = "temp-server"
+ openml.config.apikey = "temp-apikey"
+
+ assert openml.config.server == openml.config.servers[api_version]["server"]
+ assert openml.config.apikey == openml.config.servers[api_version]["apikey"]
+
+ for version, servers in openml.config.servers.items():
+ if version == api_version:
+ assert servers != openml.config._get_servers(mode)[version]
+ else:
+ assert servers == openml.config._get_servers(mode)[version]
+
+
+def test_get_production_servers():
+ assert openml.config.get_production_servers() == openml.config._get_servers("production")
+
+
+def test_get_test_servers():
+ assert openml.config.get_test_servers() == openml.config._get_servers("test")
+
+
+def test_use_production_servers():
+ openml.config.use_production_servers()
+ servers_1 = openml.config.servers
+
+ openml.config._set_servers("production")
+ servers_2 = openml.config.servers
+
+ assert servers_1 == servers_2
+
+
+def test_use_test_servers():
+ openml.config.use_test_servers()
+ servers_1 = openml.config.servers
+
+ openml.config._set_servers("test")
+ servers_2 = openml.config.servers
+
+ assert servers_1 == servers_2
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8d5a00f9b..3728e0d78 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1813,7 +1813,7 @@ def test_initialize_model_from_run_nonstrict(self):
@mock.patch.object(requests.Session, "delete")
-def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_run_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -1826,13 +1826,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(40_000)
- run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
+ run_url = test_server_v1 + "run/40000"
assert run_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_run_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -1842,13 +1842,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
success = openml.runs.delete_run(10591880)
assert success
- run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
+ run_url = test_server_v1 + "run/10591880"
assert run_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -1861,9 +1861,9 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(9_999_999)
- run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
+ run_url = test_server_v1 + "run/9999999"
assert run_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@pytest.mark.sklearn()
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index df3c0a3b6..bf2fcfeae 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -245,7 +245,7 @@ def test_deletion_of_cache_dir(self):
@mock.patch.object(requests.Session, "delete")
-def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_task_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -258,13 +258,13 @@ def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(1)
- task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/1"
+ task_url = test_server_v1 + "task/1"
assert task_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_task_with_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -277,13 +277,13 @@ def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(3496)
- task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/3496"
+ task_url = test_server_v1 + "task/3496"
assert task_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
@@ -293,13 +293,13 @@ def test_delete_success(mock_delete, test_files_directory, test_api_key):
success = openml.tasks.delete_task(361323)
assert success
- task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/361323"
+ task_url = test_server_v1 + "task/361323"
assert task_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
@mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_task(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
@@ -312,6 +312,6 @@ def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
):
openml.tasks.delete_task(9_999_999)
- task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/9999999"
+ task_url = test_server_v1 + "task/9999999"
assert task_url == mock_delete.call_args.args[0]
- assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+ assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 75f24ebf0..111ff778c 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int:
def _mocked_perform_api_call(call, request_method):
- url = openml.config.server + "/" + call
+ url = openml.config.server + call
return openml._api_calls._download_text_file(url)