"""
Smarter.common.utils.rfc1034_compliance
========================================
Helpers for generating and converting RFC 1034-compliant strings.
This module provides utility functions for working with DNS-safe names and resource identifiers
that comply with RFC 1034. It includes:
- ``rfc1034_compliant_str``: Converts arbitrary strings to RFC 1034-compliant DNS labels.
- ``rfc1034_compliant_to_snake``: Converts RFC 1034-compliant names to Pythonic ``snake_case``.
**Example usage:**
.. code-block:: python
from smarter.common.utils import rfc1034_compliant_str, rfc1034_compliant_to_snake
label = rfc1034_compliant_str("My_LLMClient_2025")
print(label) # Output: my-llm_client-2025
snake = rfc1034_compliant_to_snake(label)
print(snake) # Output: my_llm_client_2025
"""
import re
from functools import lru_cache
from smarter.common.exceptions import SmarterValueError
from smarter.lib import logging
logger = logging.getLogger(__name__)
logger_prefix = logging.formatted_text(__name__)
LRU_MAXSIZE = 128 # Default max size for LRU caches in this module
[docs]
@lru_cache(maxsize=LRU_MAXSIZE)
def rfc1034_compliant_str(val) -> str:
"""
Generates a RFC 1034-compliant name string suitable for use as a DNS label or resource identifier.
:param val: The input string to convert to RFC 1034-compliant format.
:type val: str
:return: A string that is:
- lower case
- contains only alphanumeric characters and hyphens
- starts and ends with an alphanumeric character
- has a maximum length of 63 characters
:rtype: str
:raises SmarterValueError: If the input is not a string or is empty after conversion.
.. note::
- Underscores in the input are replaced with hyphens.
- Invalid characters (anything other than a-z, 0-9, or '-') are removed.
- Leading and trailing hyphens are stripped.
- The result is truncated to 63 characters if necessary.
.. warning::
This function is intended for generating DNS-safe names. It does not guarantee uniqueness or suitability for all RFC 1034 use cases.
**Example usage:**
.. code-block:: python
from smarter.common.utils import rfc1034_compliant_str
# Basic usage
print(rfc1034_compliant_str("My_LLMClient_2025")) # Output: my-llm_client-2025
# With special characters
print(rfc1034_compliant_str("My@Bot!_Name")) # Output: my-bot-name
# With long input
long_name = "ThisIsAReallyLongLLMClientNameThatShouldBeTruncatedToSixtyThreeCharacters_Extra"
print(rfc1034_compliant_str(long_name)) # Output: thisisareallylongllm_clientnamethatshouldbetruncatedtosixtythreecharacters
"""
if not isinstance(val, str):
raise SmarterValueError(f"Could not generate RFC 1034 compliant name from {type(val)}")
# Replace underscores with hyphens
label = val.lower().replace("_", "-")
# Remove invalid characters
label = re.sub(r"[^a-z0-9-]", "", label)
# Remove leading/trailing hyphens
label = label.strip("-")
# Truncate to 63 characters
if label:
return label[:63]
else:
raise SmarterValueError("Could not generate RFC 1034 compliant name from empty string")
[docs]
@lru_cache(maxsize=LRU_MAXSIZE)
def rfc1034_compliant_to_snake(val) -> str:
"""
Converts a RFC 1034-compliant name (typically used for DNS labels or resource identifiers) to a more human-readable ``snake_case`` name.
This function is useful for translating machine-friendly names (which use hyphens as word separators) into Pythonic identifiers (which use underscores).
:param val: The RFC 1034-compliant name to convert. This should be a string containing only lowercase letters, numbers, and hyphens.
:type val: str
:return: The converted name in ``snake_case`` format, with hyphens replaced by underscores.
:rtype: str
:raises SmarterValueError: If the input is not a string.
.. note::
- Only hyphens are replaced; other characters are preserved.
- The function does not validate that the input is strictly RFC 1034-compliant. It assumes the input is already sanitized.
.. warning::
This function does not handle conversion of other non-alphanumeric characters. If the input contains characters other than hyphens, underscores, letters, or numbers, they will remain unchanged.
**Example usage:**
.. code-block:: python
from smarter.common.utils import rfc1034_compliant_to_snake
# Basic conversion
print(rfc1034_compliant_to_snake("my-llm_client-2025"))
# Output: my_llm_client_2025
# Input with no hyphens
print(rfc1034_compliant_to_snake("simplelabel"))
# Output: simplelabel
# Input with multiple hyphens
print(rfc1034_compliant_to_snake("this-is-a-test-label"))
# Output: this_is_a_test_label
# Input with invalid type
try:
rfc1034_compliant_to_snake(12345)
except SmarterValueError as e:
print(e)
# Output: Could not convert RFC 1034 compliant name from <class 'int'>
"""
logger.debug("%s.rfc1034_compliant_to_snake()", logger_prefix)
if not isinstance(val, str):
raise SmarterValueError(f"Could not convert RFC 1034 compliant name from {type(val)}")
# Replace hyphens with underscores
name = val.replace("-", "_")
return name
__all__ = [
"rfc1034_compliant_str",
"rfc1034_compliant_to_snake",
]