Skip to content

rg.markdown

To support the usage of Markdown within Argilla, we've created some helper functions to easy the usage of DataURL conversions and chat message visualizations.

media

video_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)

Convert a video file to an HTML tag with embedded base64 data.

Parameters:

Name Type Description Default
file_source Union[str, bytes]

The path to the media file or a non-b64 encoded byte string.

required
file_type Optional[str]

The type of the video file. If not provided, it will be inferred from the file extension.

None
width Optional[str]

Display width in HTML. Defaults to None.

None
height Optional[str]

Display height in HTML. Defaults to None.

None
autoplay bool

True to autoplay media. Defaults to False.

False
loop bool

True to loop media. Defaults to False.

False

Returns:

Type Description
str

The HTML tag with embedded base64 data.

Examples:

from argilla.markdown import video_to_html
html = video_to_html("my_video.mp4", width="300px", height="300px", autoplay=True, loop=True)
Source code in src/argilla/markdown/media.py
def video_to_html(
    file_source: Union[str, bytes],
    file_type: Optional[str] = None,
    width: Optional[str] = None,
    height: Optional[str] = None,
    autoplay: bool = False,
    loop: bool = False,
) -> str:
    """
    Convert a video file to an HTML tag with embedded base64 data.

    Args:
        file_source: The path to the media file or a non-b64 encoded byte string.
        file_type: The type of the video file. If not provided, it will be inferred from the file extension.
        width: Display width in HTML. Defaults to None.
        height: Display height in HTML. Defaults to None.
        autoplay: True to autoplay media. Defaults to False.
        loop: True to loop media. Defaults to False.

    Returns:
        The HTML tag with embedded base64 data.

    Examples:
        ```python
        from argilla.markdown import video_to_html
        html = video_to_html("my_video.mp4", width="300px", height="300px", autoplay=True, loop=True)
        ```
    """
    return _media_to_html("video", file_source, file_type, width, height, autoplay, loop)

audio_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)

Convert an audio file to an HTML tag with embedded base64 data.

Parameters:

Name Type Description Default
file_source Union[str, bytes]

The path to the media file or a non-b64 encoded byte string.

required
file_type Optional[str]

The type of the audio file. If not provided, it will be inferred from the file extension.

None
width Optional[str]

Display width in HTML. Defaults to None.

None
height Optional[str]

Display height in HTML. Defaults to None.

None
autoplay bool

True to autoplay media. Defaults to False.

False
loop bool

True to loop media. Defaults to False.

False

Returns:

Type Description
str

The HTML tag with embedded base64 data.

Examples:

from argilla.markdown import audio_to_html
html = audio_to_html("my_audio.mp3", width="300px", height="300px", autoplay=True, loop=True)
Source code in src/argilla/markdown/media.py
def audio_to_html(
    file_source: Union[str, bytes],
    file_type: Optional[str] = None,
    width: Optional[str] = None,
    height: Optional[str] = None,
    autoplay: bool = False,
    loop: bool = False,
) -> str:
    """
    Convert an audio file to an HTML tag with embedded base64 data.

    Args:
        file_source: The path to the media file or a non-b64 encoded byte string.
        file_type: The type of the audio file. If not provided, it will be inferred from the file extension.
        width: Display width in HTML. Defaults to None.
        height: Display height in HTML. Defaults to None.
        autoplay: True to autoplay media. Defaults to False.
        loop: True to loop media. Defaults to False.

    Returns:
        The HTML tag with embedded base64 data.

    Examples:
        ```python
        from argilla.markdown import audio_to_html
        html = audio_to_html("my_audio.mp3", width="300px", height="300px", autoplay=True, loop=True)
        ```
    """
    return _media_to_html("audio", file_source, file_type, width, height, autoplay, loop)

image_to_html(file_source, file_type=None, width=None, height=None)

Convert an image file to an HTML tag with embedded base64 data.

Parameters:

Name Type Description Default
file_source Union[str, bytes]

The path to the media file or a non-b64 encoded byte string.

required
file_type Optional[str]

The type of the image file. If not provided, it will be inferred from the file extension.

None
width Optional[str]

Display width in HTML. Defaults to None.

None
height Optional[str]

Display height in HTML. Defaults to None.

None

Returns:

Type Description
str

The HTML tag with embedded base64 data.

Examples:

from argilla.markdown import image_to_html
html = image_to_html("my_image.png", width="300px", height="300px")
Source code in src/argilla/markdown/media.py
def image_to_html(
    file_source: Union[str, bytes],
    file_type: Optional[str] = None,
    width: Optional[str] = None,
    height: Optional[str] = None,
) -> str:
    """
    Convert an image file to an HTML tag with embedded base64 data.

    Args:
        file_source: The path to the media file or a non-b64 encoded byte string.
        file_type: The type of the image file. If not provided, it will be inferred from the file extension.
        width: Display width in HTML. Defaults to None.
        height: Display height in HTML. Defaults to None.

    Returns:
        The HTML tag with embedded base64 data.

    Examples:
        ```python
        from argilla.markdown import image_to_html
        html = image_to_html("my_image.png", width="300px", height="300px")
        ```
    """
    return _media_to_html("image", file_source, file_type, width, height)

pdf_to_html(file_source, width='1000px', height='1000px')

Convert a pdf file to an HTML tag with embedded data.

Parameters:

Name Type Description Default
file_source Union[str, bytes]

The path to the PDF file, a bytes object with PDF data, or a URL.

required
width Optional[str]

Display width in HTML. Defaults to "1000px".

'1000px'
height Optional[str]

Display height in HTML. Defaults to "1000px".

'1000px'

Returns:

Type Description
str

HTML string embedding the PDF.

Raises:

Type Description
ValueError

If the width and height are not pixel or percentage.

Examples:

from argilla.markdown import pdf_to_html
html = pdf_to_html("my_pdf.pdf", width="300px", height="300px")
Source code in src/argilla/markdown/media.py
def pdf_to_html(
    file_source: Union[str, bytes], width: Optional[str] = "1000px", height: Optional[str] = "1000px"
) -> str:
    """
    Convert a pdf file to an HTML tag with embedded data.

    Args:
        file_source: The path to the PDF file, a bytes object with PDF data, or a URL.
        width: Display width in HTML. Defaults to "1000px".
        height: Display height in HTML. Defaults to "1000px".

    Returns:
        HTML string embedding the PDF.

    Raises:
        ValueError: If the width and height are not pixel or percentage.

    Examples:
        ```python
        from argilla.markdown import pdf_to_html
        html = pdf_to_html("my_pdf.pdf", width="300px", height="300px")
        ```
    """
    if not _is_valid_dimension(width) or not _is_valid_dimension(height):
        raise ValueError("Width and height must be valid pixel (e.g., '300px') or percentage (e.g., '50%') values.")

    if isinstance(file_source, str) and urlparse(file_source).scheme in ["http", "https"]:
        return f'<embed src="{file_source}" type="application/pdf" width="{width}" height="{height}"></embed>'

    file_data, _ = _get_file_data(file_source, "pdf")
    pdf_base64 = base64.b64encode(file_data).decode("utf-8")
    data_url = f"data:application/pdf;base64,{pdf_base64}"
    return f'<object id="pdf" data="{data_url}" type="application/pdf" width="{width}" height="{height}"></object>'

chat

chat_to_html(messages)

Converts a list of chat messages in the OpenAI format to HTML.

Parameters:

Name Type Description Default
messages List[Dict[str, str]]

A list of dictionaries where each dictionary represents a chat message. Each dictionary should have the keys: - "role": A string indicating the role of the sender (e.g., "user", "model", "assistant", "system"). - "content": The content of the message.

required

Returns:

Name Type Description
str str

An HTML string that represents the chat conversation.

Raises:

Type Description
ValueError

If the an invalid role is passed.

Examples:

from argilla.markdown import chat_to_html
html = chat_to_html([
    {"role": "user", "content": "hello"},
    {"role": "assistant", "content": "goodbye"}
])
Source code in src/argilla/markdown/chat.py
def chat_to_html(messages: List[Dict[str, str]]) -> str:
    """
    Converts a list of chat messages in the OpenAI format to HTML.

    Args:
        messages (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a chat message.
            Each dictionary should have the keys:
                - "role": A string indicating the role of the sender (e.g., "user", "model", "assistant", "system").
                - "content": The content of the message.

    Returns:
        str: An HTML string that represents the chat conversation.

    Raises:
        ValueError: If the an invalid role is passed.

    Examples:
        ```python
        from argilla.markdown import chat_to_html
        html = chat_to_html([
            {"role": "user", "content": "hello"},
            {"role": "assistant", "content": "goodbye"}
        ])
        ```
    """
    chat_html = ""
    for message in messages:
        role = message["role"]
        content = message["content"]
        content_html = markdown.markdown(content)

        if role == "user":
            html = '<div class="user-message">' + '<div class="message-content">'
        elif role in ["model", "assistant", "system"]:
            html = '<div class="system-message">' + '<div class="message-content">'
        else:
            raise ValueError(f"Invalid role: {role}")

        html += f"{content_html}"
        html += "</div></div>"
        chat_html += html

    return f"<body>{CHAT_CSS_STYLE}{chat_html}</body>"