Skip to content

rg.Argilla

To interact with the Argilla server from Python you can use the Argilla class. The Argilla client is used to create, get, update, and delete all Argilla resources, such as workspaces, users, datasets, and records.

Usage Examples

Deploying Argilla Server on Hugging Face Spaces

To deploy Argilla on Hugging Face Spaces, use the deploy_on_spaces method.

import argilla as rg

client = rg.Argilla.deploy_on_spaces(api_key="12345678")

Connecting to an Argilla server

To connect to an Argilla server, instantiate the Argilla class and pass the api_url of the server and the api_key to authenticate.

import argilla as rg

client = rg.Argilla(
    api_url="https://argilla.example.com",
    api_key="my_api_key",
)

Accessing Dataset, Workspace, and User objects

The Argilla clients provides access to the Dataset, Workspace, and User objects of the Argilla server.

my_dataset = client.datasets("my_dataset")

my_workspace = client.workspaces("my_workspace")

my_user = client.users("my_user")

These resources can then be interacted with to access their properties and methods. For example, to list all datasets in a workspace:

for dataset in my_workspace.datasets:
    print(dataset.name)

Argilla

Bases: APIClient, SpacesDeploymentMixin, NotebookHTMLReprMixin

Argilla API client. This is the main entry point to interact with the API.

Attributes:

Name Type Description
workspaces Workspaces

A collection of workspaces.

datasets Datasets

A collection of datasets.

users Users

A collection of users.

me User

The current user.

Source code in src/argilla/client.py
class Argilla(_api.APIClient, SpacesDeploymentMixin, NotebookHTMLReprMixin):
    """Argilla API client. This is the main entry point to interact with the API.

    Attributes:
        workspaces: A collection of workspaces.
        datasets: A collection of datasets.
        users: A collection of users.
        me: The current user.
    """

    # Default instance of Argilla
    _default_client: Optional["Argilla"] = None

    def __init__(
        self,
        api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,
        api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,
        timeout: int = DEFAULT_HTTP_CONFIG.timeout,
        retries: int = DEFAULT_HTTP_CONFIG.retries,
        **http_client_args,
    ) -> None:
        """Inits the `Argilla` client.

        Args:
            api_url: the URL of the Argilla API. If not provided, then the value will try
                to be set from `ARGILLA_API_URL` environment variable. Defaults to
                `"http://localhost:6900"`.
            api_key: the key to be used to authenticate in the Argilla API. If not provided,
                then the value will try to be set from `ARGILLA_API_KEY` environment variable.
                Defaults to `None`.
            timeout: the maximum time in seconds to wait for a request to the Argilla API
                to be completed before raising an exception. Defaults to `60`.
            retries: the number of times to retry the HTTP connection to the Argilla API
                before raising an exception. Defaults to `5`.
        """
        super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)

        self._set_default(self)

    @property
    def workspaces(self) -> "Workspaces":
        """A collection of workspaces on the server."""
        return Workspaces(client=self)

    @property
    def datasets(self) -> "Datasets":
        """A collection of datasets on the server."""
        return Datasets(client=self)

    @property
    def users(self) -> "Users":
        """A collection of users on the server."""
        return Users(client=self)

    @cached_property
    def me(self) -> "User":
        from argilla.users import User

        return User(client=self, _model=self.api.users.get_me())

    ############################
    # Private methods
    ############################

    @classmethod
    def _set_default(cls, client: "Argilla") -> None:
        """Set the default instance of Argilla."""
        cls._default_client = client

    @classmethod
    def _get_default(cls) -> "Argilla":
        """Get the default instance of Argilla. If it doesn't exist, create a new one."""
        if cls._default_client is None:
            cls._default_client = Argilla()
        return cls._default_client

workspaces: Workspaces property

A collection of workspaces on the server.

datasets: Datasets property

A collection of datasets on the server.

users: Users property

A collection of users on the server.

__init__(api_url=DEFAULT_HTTP_CONFIG.api_url, api_key=DEFAULT_HTTP_CONFIG.api_key, timeout=DEFAULT_HTTP_CONFIG.timeout, retries=DEFAULT_HTTP_CONFIG.retries, **http_client_args)

Inits the Argilla client.

Parameters:

Name Type Description Default
api_url Optional[str]

the URL of the Argilla API. If not provided, then the value will try to be set from ARGILLA_API_URL environment variable. Defaults to "http://localhost:6900".

api_url
api_key Optional[str]

the key to be used to authenticate in the Argilla API. If not provided, then the value will try to be set from ARGILLA_API_KEY environment variable. Defaults to None.

api_key
timeout int

the maximum time in seconds to wait for a request to the Argilla API to be completed before raising an exception. Defaults to 60.

timeout
retries int

the number of times to retry the HTTP connection to the Argilla API before raising an exception. Defaults to 5.

retries
Source code in src/argilla/client.py
def __init__(
    self,
    api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,
    api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,
    timeout: int = DEFAULT_HTTP_CONFIG.timeout,
    retries: int = DEFAULT_HTTP_CONFIG.retries,
    **http_client_args,
) -> None:
    """Inits the `Argilla` client.

    Args:
        api_url: the URL of the Argilla API. If not provided, then the value will try
            to be set from `ARGILLA_API_URL` environment variable. Defaults to
            `"http://localhost:6900"`.
        api_key: the key to be used to authenticate in the Argilla API. If not provided,
            then the value will try to be set from `ARGILLA_API_KEY` environment variable.
            Defaults to `None`.
        timeout: the maximum time in seconds to wait for a request to the Argilla API
            to be completed before raising an exception. Defaults to `60`.
        retries: the number of times to retry the HTTP connection to the Argilla API
            before raising an exception. Defaults to `5`.
    """
    super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)

    self._set_default(self)

SpacesDeploymentMixin

Bases: LoggingMixin

Source code in src/argilla/_helpers/_deploy.py
class SpacesDeploymentMixin(LoggingMixin):
    @classmethod
    def deploy_on_spaces(
        cls,
        api_key: str,
        repo_name: Optional[str] = "argilla",
        org_name: Optional[str] = None,
        hf_token: Optional[str] = None,
        space_storage: Optional[Union[str, "SpaceStorage", Literal["small", "medium", "large"]]] = None,
        space_hardware: Optional[Union[str, "SpaceHardware", Literal["cpu-basic", "cpu-upgrade"]]] = "cpu-basic",
        private: Optional[Union[bool, None]] = False,
    ) -> "Argilla":
        """
                Deploys Argilla on Hugging Face Spaces.

                Args:
                    api_key (str): The Argilla API key to be defined for the owner user and creator of the Space.
                    repo_name (Optional[str]): The ID of the repository where Argilla will be deployed. Defaults to "argilla".
                    org_name (Optional[str]): The name of the organization where Argilla will be deployed. Defaults to None.
                    hf_token (Optional[Union[str, None]]): The Hugging Face authentication token. Defaults to None.
                    space_storage (Optional[Union[str, SpaceStorage]]): The persistant storage size for the space. Defaults to None without persistant storage.
                    space_hardware (Optional[Union[str, SpaceHardware]]): The hardware configuration for the space. Defaults to "cpu-basic" with downtime after 48 hours of inactivity.
                    private (Optional[Union[bool, None]]): Whether the space should be private. Defaults to False.

                Returns:
                    Argilla: The Argilla client.

                Example:
                    ```Python
                    import argilla as rg
        api
                    client = rg.Argilla.deploy_on_spaces(api_key="12345678")
                    ```
        """
        hf_token = cls._acquire_hf_token(ht_token=hf_token)
        hf_api = HfApi(token=hf_token)

        # Get the org name from the repo name or default to the current user
        token_username = hf_api.whoami()["name"]
        org_name = org_name or token_username
        repo_id = f"{org_name}/{repo_name}"

        # Define the api_key for the space
        secrets = [
            {"key": "API_KEY", "value": api_key, "description": "The API key of the owner user."},
            {"key": "WORKSPACE", "value": "argilla", "description": "The workspace of the space."},
        ]

        # check API key length
        if len(api_key) < 8:
            raise ValueError(
                "Provided API key has invalid length. Please provide an apikey with at least 8 characters."
            )

        # Check if the space already exists
        if hf_api.repo_exists(repo_id=repo_id, repo_type="space"):
            if cls._is_space_stopped(hf_api.get_space_runtime(repo_id=repo_id).stage):
                hf_api.restart_space(repo_id=repo_id)
            warnings.warn(
                f"Space {repo_id} already exists. Using provided API key. If client authentication fails, go to "
                f"https://huggingface.co/spaces/{repo_id} to login with OAuth and get the correct API key.",
                stacklevel=2,
            )
        else:
            if space_storage is None:
                cls._space_storage_warning()

            hf_api.duplicate_space(
                from_id=_ARGILLA_SPACE_TEMPLATE_REPO,
                to_id=repo_id,
                private=private,
                exist_ok=True,
                hardware=space_hardware,
                storage=space_storage,
                secrets=secrets,
            )

        repo_url: RepoUrl = hf_api.create_repo(repo_id=repo_id, repo_type="space", exist_ok=True, space_sdk="docker")
        api_url: str = (
            f"https://{cls._sanitize_url_component(org_name)}-{cls._sanitize_url_component(repo_name)}.hf.space/"
        )
        cls._log_message(cls, message=f"Argilla is being deployed at: {repo_url}")
        while cls._is_building(hf_api.get_space_runtime(repo_id=repo_id).stage):
            time.sleep(_SLEEP_TIME)
            cls._log_message(cls, message=f"Deployment in progress. Waiting {_SLEEP_TIME} seconds.")

        headers = {}
        if private:
            headers["Authorization"] = f"Bearer {hf_token}"

        return cls(api_url=api_url, api_key=api_key, headers=headers)

    @staticmethod
    def _space_storage_warning() -> None:
        warnings.warn(
            "No storage provided. The space will not have persistant storage so every 48 hours your data will be reset."
        )

    @classmethod
    def _acquire_hf_token(cls, ht_token: Union[str, None]) -> str:
        """Obtain the Hugging Face authentication token to deploy a space and authenticate."""
        if ht_token is None:
            ht_token = get_token()
        if ht_token is None:
            if cls._is_interactive():
                notebook_login()
            else:
                login()
            ht_token = get_token()
        return ht_token

    @classmethod
    def _is_building(cls, stage: SpaceStage) -> bool:
        """Check the current stage of the space runtime. Simplified to return True when being built."""
        if stage in ["RUNNING_APP_STARTING", "RUNNING_BUILDING", "BUILDING", "APP_STARTING"]:
            return True
        elif stage in ["RUNNING", "PAUSED", "STOPPED"]:
            return False
        else:
            raise ValueError(f"Space configuration is wrong and in stage: {stage}")

    @classmethod
    def _is_space_stopped(cls, stage: SpaceStage) -> bool:
        """Check the current stage of the space runtime. Simplified to return True when it can be built."""
        if stage in ["RUNNING", "RUNNING_APP_STARTING", "RUNNING_BUILDING", "BUILDING", "APP_STARTING"]:
            return False
        elif stage in ["PAUSED", "STOPPED"]:
            return True
        else:
            raise ValueError(f"Space configuration is wrong and in stage: {stage}")

    @staticmethod
    def _sanitize_url_component(component: str) -> str:
        """Sanitize a component of a URL by replacing non-URL compatible characters."""

        # Replace any character that's not alphanumeric or hyphen with a hyphen
        sanitized = re.sub(r"[^a-zA-Z0-9-]", "-", component)
        # Convert to lowercase
        sanitized = sanitized.lower()
        # Remove any leading or trailing hyphens
        sanitized = sanitized.strip("-")
        return sanitized

deploy_on_spaces(api_key, repo_name='argilla', org_name=None, hf_token=None, space_storage=None, space_hardware='cpu-basic', private=False) classmethod

    Deploys Argilla on Hugging Face Spaces.

    Args:
        api_key (str): The Argilla API key to be defined for the owner user and creator of the Space.
        repo_name (Optional[str]): The ID of the repository where Argilla will be deployed. Defaults to "argilla".
        org_name (Optional[str]): The name of the organization where Argilla will be deployed. Defaults to None.
        hf_token (Optional[Union[str, None]]): The Hugging Face authentication token. Defaults to None.
        space_storage (Optional[Union[str, SpaceStorage]]): The persistant storage size for the space. Defaults to None without persistant storage.
        space_hardware (Optional[Union[str, SpaceHardware]]): The hardware configuration for the space. Defaults to "cpu-basic" with downtime after 48 hours of inactivity.
        private (Optional[Union[bool, None]]): Whether the space should be private. Defaults to False.

    Returns:
        Argilla: The Argilla client.

    Example:
        ```Python
        import argilla as rg

api client = rg.Argilla.deploy_on_spaces(api_key="12345678") ```

Source code in src/argilla/_helpers/_deploy.py
@classmethod
def deploy_on_spaces(
    cls,
    api_key: str,
    repo_name: Optional[str] = "argilla",
    org_name: Optional[str] = None,
    hf_token: Optional[str] = None,
    space_storage: Optional[Union[str, "SpaceStorage", Literal["small", "medium", "large"]]] = None,
    space_hardware: Optional[Union[str, "SpaceHardware", Literal["cpu-basic", "cpu-upgrade"]]] = "cpu-basic",
    private: Optional[Union[bool, None]] = False,
) -> "Argilla":
    """
            Deploys Argilla on Hugging Face Spaces.

            Args:
                api_key (str): The Argilla API key to be defined for the owner user and creator of the Space.
                repo_name (Optional[str]): The ID of the repository where Argilla will be deployed. Defaults to "argilla".
                org_name (Optional[str]): The name of the organization where Argilla will be deployed. Defaults to None.
                hf_token (Optional[Union[str, None]]): The Hugging Face authentication token. Defaults to None.
                space_storage (Optional[Union[str, SpaceStorage]]): The persistant storage size for the space. Defaults to None without persistant storage.
                space_hardware (Optional[Union[str, SpaceHardware]]): The hardware configuration for the space. Defaults to "cpu-basic" with downtime after 48 hours of inactivity.
                private (Optional[Union[bool, None]]): Whether the space should be private. Defaults to False.

            Returns:
                Argilla: The Argilla client.

            Example:
                ```Python
                import argilla as rg
    api
                client = rg.Argilla.deploy_on_spaces(api_key="12345678")
                ```
    """
    hf_token = cls._acquire_hf_token(ht_token=hf_token)
    hf_api = HfApi(token=hf_token)

    # Get the org name from the repo name or default to the current user
    token_username = hf_api.whoami()["name"]
    org_name = org_name or token_username
    repo_id = f"{org_name}/{repo_name}"

    # Define the api_key for the space
    secrets = [
        {"key": "API_KEY", "value": api_key, "description": "The API key of the owner user."},
        {"key": "WORKSPACE", "value": "argilla", "description": "The workspace of the space."},
    ]

    # check API key length
    if len(api_key) < 8:
        raise ValueError(
            "Provided API key has invalid length. Please provide an apikey with at least 8 characters."
        )

    # Check if the space already exists
    if hf_api.repo_exists(repo_id=repo_id, repo_type="space"):
        if cls._is_space_stopped(hf_api.get_space_runtime(repo_id=repo_id).stage):
            hf_api.restart_space(repo_id=repo_id)
        warnings.warn(
            f"Space {repo_id} already exists. Using provided API key. If client authentication fails, go to "
            f"https://huggingface.co/spaces/{repo_id} to login with OAuth and get the correct API key.",
            stacklevel=2,
        )
    else:
        if space_storage is None:
            cls._space_storage_warning()

        hf_api.duplicate_space(
            from_id=_ARGILLA_SPACE_TEMPLATE_REPO,
            to_id=repo_id,
            private=private,
            exist_ok=True,
            hardware=space_hardware,
            storage=space_storage,
            secrets=secrets,
        )

    repo_url: RepoUrl = hf_api.create_repo(repo_id=repo_id, repo_type="space", exist_ok=True, space_sdk="docker")
    api_url: str = (
        f"https://{cls._sanitize_url_component(org_name)}-{cls._sanitize_url_component(repo_name)}.hf.space/"
    )
    cls._log_message(cls, message=f"Argilla is being deployed at: {repo_url}")
    while cls._is_building(hf_api.get_space_runtime(repo_id=repo_id).stage):
        time.sleep(_SLEEP_TIME)
        cls._log_message(cls, message=f"Deployment in progress. Waiting {_SLEEP_TIME} seconds.")

    headers = {}
    if private:
        headers["Authorization"] = f"Bearer {hf_token}"

    return cls(api_url=api_url, api_key=api_key, headers=headers)