Source code for smarter.lib.django.models

"""Django ORM base model"""

import base64
import datetime
import json
import re
from functools import cached_property
from logging import getLogger
from typing import Any, Optional

from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.query import QuerySet
from django.forms.models import model_to_dict
from django.utils.timezone import is_aware, make_aware
from taggit.managers import TaggableManager

from smarter.common.conf import smarter_settings
from smarter.common.exceptions import SmarterValueError
from smarter.common.helpers.console_helpers import formatted_text
from smarter.common.mixins import SmarterHelperMixin
from smarter.lib.cache import cache_results
from smarter.lib.django.validators import SmarterValidator
from smarter.lib.json import SmarterJSONEncoder
from smarter.lib.logging import WaffleSwitchedLoggerWrapper

logger = getLogger(__name__)
cache_prefix = f"{__name__}."


# pylint: disable=W0613
def should_log_verbose(level):
    """Check if logging should be done based on the waffle switch."""
    return smarter_settings.verbose_logging


verbose_logger = WaffleSwitchedLoggerWrapper(logger, should_log_verbose)


def validate_no_spaces(value) -> None:
    """Validate that the string does not contain spaces."""
    if " " in value:
        raise SmarterValueError(f"Value must not contain spaces: {value}")


[docs] class TimestampedModel(models.Model, SmarterHelperMixin): """ Abstract base model for all Django ORM models in the Smarter project, providing automatic timestamp fields and utility methods. This class should be used as the base class for all models in the project to ensure consistent tracking of creation and modification times. It adds ``created_at`` and ``updated_at`` fields, and provides validation and time-difference utilities. **Example Usage:** .. code-block:: python from smarter.smarter.lib.django.models import TimestampedModel class MyModel(TimestampedModel): name = models.CharField(max_length=100) # Creating an instance obj = MyModel.objects.create(name="Example") print(obj.created_at) # Timestamp of creation print(obj.updated_at) # Timestamp of last update # Checking elapsed time since last update seconds = obj.elapsed_updated() print(f"Seconds since last update: {seconds}") **Parameters:** Inherits all parameters from ``django.db.models.Model``. .. note:: - This class is abstract and will not create a database table by itself. - The ``validate()`` method is a stub and should be implemented in subclasses as needed. - The ``save()`` method enforces validation before saving, raising a detailed error if validation fails. .. important:: - If you override ``save()``, ensure you call ``super().save(*args, **kwargs)`` to retain validation and timestamp behavior. - The ``elapsed_updated`` property expects ``updated_at`` to be set; if not, it returns ``None``. - Passing a non-datetime object to ``elapsed_updated`` will raise a ``TypeError``. - The hashed ID methods provide a way to encode and decode object IDs for use in URLs in cases where you want to avoid exposing raw database IDs. """ HASH_PREFIX = "r" HASH_SUFFIX = "x" HASH_FLOOR = 1000000 _hash_regex = None cache_expiration = smarter_settings.cache_expiration # pylint: disable=missing-class-docstring class Meta: abstract = True created_at = models.DateTimeField(auto_now_add=True, null=True, editable=False, db_index=True) """ Timestamp indicating when the model instance was created. This field is automatically set to the current date and time when the instance is first created. It is indexed in the database for efficient querying. """ updated_at = models.DateTimeField(auto_now=True, null=True, editable=False, db_index=True) """ Timestamp indicating when the model instance was last updated. This field is automatically updated to the current date and time whenever the instance is saved. It is indexed in the database for efficient querying. """ ########################################################################### # public methods for internal use. ###########################################################################
[docs] @classmethod def hash_regex(cls) -> re.Pattern: """ Returns a regex pattern that matches the hashed ID format for this model anywhere in a string. The hashed ID format is defined by the ``HASH_PREFIX`` and ``HASH_SUFFIX`` class attributes, with a base64-encoded string in between. This regex can be used to validate or extract hashed IDs from strings, including when embedded in URLs. :returns: A regex pattern for matching hashed IDs. :rtype: re.Pattern """ if cls._hash_regex is None: cls._hash_regex = re.compile(f"{cls.HASH_PREFIX}[A-Za-z0-9_-]+{cls.HASH_SUFFIX}") return cls._hash_regex
[docs] @cached_property def hashed_id(self) -> str: """ Returns a URL-friendly hashed version of the object's ID for use in URLs and other contexts where an obscured, non-identifying, non-sequential identifier is preferred. Encoding scheme: 1. Take the object's ID and add a large constant (HASH_FLOOR) to ensure it's not easily guessable. 2. Convert the resulting number to a string and encode it using URL-safe base64 encoding. 3. Remove any padding characters from the encoded string. 4. Add a prefix and suffix to the encoded string to create a recognizable format. Example: .. code-block:: python obj = MyModel.objects.create() print(obj.id) # e.g., 123 print(obj.hashed_id) # e.g., "rc2x" :returns: Hashed ID string (URL-safe, no padding) :rtype: str """ id_value = int(self.id) + self.HASH_FLOOR encoded = str(base64.urlsafe_b64encode(str(id_value).encode()).decode().rstrip("=")) padded_encoded = f"{self.HASH_PREFIX}{encoded}{self.HASH_SUFFIX}" return padded_encoded
[docs] @classmethod def id_from_hashed_id(cls, hashed_id: str) -> Optional[int]: """ Decodes a hashed ID back to the original object ID. decoding scheme: 1. Validate that the hashed ID starts with the expected prefix and ends with the expected suffix. 2. Remove the prefix and suffix to isolate the base64-encoded string. 3. Add padding if necessary to make the length of the encoded string a multiple of 4. 4. Decode the base64 string to get the original number as a string. 5. Convert the decoded string to an integer and subtract the HASH_FLOOR to get the original ID. Example: .. code-block:: python my_record = MyModel.objects.create() print(my_record.id) # e.g., 123 hashed_id = my_record.hashed_id # e.g., "rc2x" original_id = MyModel.id_from_hashed_id(hashed_id) print(original_id) # Should print the original ID (e.g., 123) :param hashed_id: The hashed ID string to decode (URL-safe, no padding). :returns: The original object ID if decoding is successful, otherwise None. :rtype: Optional[int] """ try: verbose_logger.debug( "%s.id_from_hashed_id() - Attempting to decode hashed_id: %s", cls.formatted_class_name, hashed_id, ) if not hashed_id.startswith(cls.HASH_PREFIX) or not hashed_id.endswith(cls.HASH_SUFFIX): logger.warning( "%s.id_from_hashed_id() - Hashed ID '%s' does not start with '%s' or end with '%s'.", cls.formatted_class_name, hashed_id, cls.HASH_PREFIX, cls.HASH_SUFFIX, ) return None encoded_str = hashed_id[len(cls.HASH_PREFIX) : -len(cls.HASH_SUFFIX)] # Add padding if needed padding = "=" * (-len(encoded_str) % 4) encoded_str += padding decoded_bytes = base64.urlsafe_b64decode(encoded_str.encode()) decoded_str = decoded_bytes.decode() retval = int(decoded_str) - cls.HASH_FLOOR verbose_logger.debug( "%s.id_from_hashed_id() - Successfully decoded hashed_id: %s to id: %d", cls.formatted_class_name, hashed_id, retval, ) return retval except (base64.binascii.Error, ValueError) as e: logger.error( "%s.id_from_hashed_id() - Failed to decode hashed_id '%s': %s", cls.formatted_class_name, hashed_id, e, ) return None # pylint: disable=broad-except except Exception as e: logger.exception( "%s.id_from_hashed_id() - Unexpected error while decoding hashed_id '%s': %s", cls.formatted_class_name, hashed_id, e, ) return None
[docs] @classmethod def find_hash(cls, value: str) -> Optional[str]: """ Finds and returns the first substring in the given value that matches the hashed ID format. :param value: The string to search for a hashed ID. :returns: The first matching hashed ID if found, otherwise None. :rtype: Optional[str] """ verbose_logger.debug( "%s.find_hash() - Searching for hashed ID in value: %s", cls.formatted_class_name, value, ) pattern = cls.hash_regex() match = pattern.search(value) retval = match.group(0) if match else None if retval: verbose_logger.debug( "%s.find_hash() - Found hashed ID: %s", cls.formatted_class_name, retval, ) else: verbose_logger.debug( "%s.find_hash() - No hashed ID found in value: %s", cls.formatted_class_name, value, ) return retval
########################################################################### # public methods for public use. ###########################################################################
[docs] def validate(self): """ Validate the model. .. attention:: Intended to be overridden in subclasses to provide custom validation logic. """
[docs] def save(self, *args, **kwargs): """ Save the model instance to the database, performing validation before the actual save. This method overrides the default ``save()`` behavior of Django models to ensure that the model is validated by calling :meth:`validate` before any data is written to the database. If validation fails, a :exc:`django.core.exceptions.ValidationError` is raised with detailed information about the error, the arguments passed, the model class, and the current field values. Parameters ---------- *args Positional arguments passed to the parent ``save()`` method. These are forwarded to Django's ORM. **kwargs Keyword arguments passed to the parent ``save()`` method. These are forwarded to Django's ORM. Examples -------- .. code-block:: python obj = MyModel(name="Example") obj.save() # Will call validate() before saving .. note:: - The :meth:`validate` method is intended to be overridden in subclasses to provide custom validation logic. - If :meth:`validate` raises a :exc:`ValidationError`, the save operation is aborted and the error is propagated. - The error message includes the arguments, keyword arguments, model class, and current field values for easier debugging. .. important:: - If you override this method in a subclass, always call ``super().save(*args, **kwargs)`` to retain validation and timestamp functionality. - If validation fails, no data will be saved to the database. """ try: self.validate() except (ValidationError, SmarterValueError) as e: raise SmarterValueError( f"TimestampedModel().save() validation error: {e} | args={args} kwargs={kwargs} | model={self.__class__.__name__} | field_values={self.__dict__}" ) from e except Exception as e: raise SmarterValueError( f"TimestampedModel().save() unexpected error during validation: {e} | args={args} kwargs={kwargs} | model={self.__class__.__name__} | field_values={self.__dict__}" ) from e super().save(*args, **kwargs)
[docs] @cached_property def record_locator(self) -> str: """ Returns a short, URL-friendly record locator derived from the object's ID. Example: .. code-block:: python obj = MyModel.objects.create(name="Example") print(obj.id) # e.g., 123 print(obj.record_locator) # e.g., "chatbot-rc2x" :returns: Record locator string (URL-safe, no padding) :rtype: str """ prefix = str(self.__class__.__name__).lower() return f"{prefix}-{self.hashed_id}"
[docs] @classmethod def get_object_by_locator(cls, locator: str) -> Optional["TimestampedModel"]: """ Retrieves an object based on its record locator. Example: .. code-block:: python obj = MyModel.objects.create() print(obj.id) # e.g., 123 locator = obj.record_locator # e.g., "mymodel-rc2x" retrieved_obj = MyModel.get_object_by_locator(locator) print(type(retrieved_obj)) # Should be <class 'MyModel'> print(retrieved_obj) # Should be the same as obj :param locator: The record locator string to decode and search for. :returns: The model instance if found, otherwise None. :rtype: Optional[TimestampedModel] """ verbose_logger.debug( "%s.get_object_by_locator() - Attempting to retrieve object with locator: %s", cls.formatted_class_name, locator, ) try: prefix = str(cls.__name__).lower() if not locator.startswith(f"{prefix}-"): logger.warning( "%s.get_object_by_locator() - Locator '%s' does not start with expected prefix '%s-'.", cls.formatted_class_name, locator, prefix, ) return None hashed_part = locator[len(prefix) + 1 :].lstrip("0") id_value = cls.id_from_hashed_id(hashed_part) if id_value is None: logger.warning( "%s.get_object_by_locator() - Failed to decode hashed part '%s' from locator '%s'.", cls.formatted_class_name, hashed_part, locator, ) return None obj = cls.get_cached_object(pk=id_value) if obj is None: logger.warning( "%s.get_object_by_locator() - No object found with ID %d decoded from locator '%s'.", cls.formatted_class_name, id_value, locator, ) else: verbose_logger.debug( "%s.get_object_by_locator() - Successfully retrieved object with ID %d from locator '%s'.", cls.formatted_class_name, id_value, locator, ) return obj # type: ignore[return-value] # pylint: disable=broad-except except Exception as e: logger.exception( "%s.get_object_by_locator() - Unexpected error while retrieving object with locator '%s': %s", cls.formatted_class_name, locator, e, ) return None
@property def elapsed_updated(self, dt=None) -> Optional[int]: """ Calculate the absolute time difference in seconds between a given datetime and the model's ``updated_at`` timestamp. This property is useful for determining how much time has elapsed since the model instance was last updated, or for comparing the ``updated_at`` field to any arbitrary datetime. **Parameters:** - dt (datetime, optional): The reference datetime to compare against ``updated_at``. - If ``dt`` is not provided, the current time is used. - Both naive and timezone-aware datetime objects are supported; the method will handle conversions as needed. **Returns:** - int or None: The absolute difference in seconds between ``updated_at`` and ``dt``. Returns ``None`` if ``updated_at`` is not set. **Example Usage:** .. code-block:: python obj = MyModel.objects.get(pk=1) # Time since last update seconds = obj.elapsed_updated print(f"Seconds since last update: {seconds}") # Compare to a specific datetime import datetime dt = datetime.datetime(2025, 12, 1, 12, 0, 0) diff = obj.elapsed_updated(dt) print(f"Seconds between updated_at and 2025-12-01 12:00:00: {diff}") .. note:: - Handles both naive and aware datetime objects, converting as necessary to ensure accurate calculation. - If ``updated_at`` is not set (e.g., the object has not been saved), returns ``None``. .. attention:: - If ``dt`` is provided and is not a ``datetime.datetime`` instance, a ``TypeError`` will be raised. - Always ensure that ``updated_at`` is set before relying on this property for calculations. """ utc = datetime.timezone.utc if not self.updated_at: return None if dt is None: dt = datetime.datetime.now(utc) if is_aware(self.updated_at) else datetime.datetime.now() if not isinstance(dt, datetime.datetime): raise TypeError(f"Expected a datetime object, got {type(dt)} instead.") updated = self.updated_at if is_aware(updated) and not is_aware(dt): dt = make_aware(dt, utc) elif not is_aware(updated) and is_aware(dt): updated = make_aware(updated, utc) delta = int(abs((updated - dt).total_seconds())) return delta
[docs] def to_json(self) -> dict[str, Any]: """ Serialize the model instance to a JSON-compatible dictionary. This method uses the custom ``SmarterJSONEncoder`` to ensure that all fields, including timestamps and any complex data types, are properly serialized. :returns: A dictionary representation of the model instance suitable for JSON serialization. :rtype: dict[str, Any] """ try: data = model_to_dict(self) data["record_locator"] = self.record_locator data["elapsed_updated"] = self.elapsed_updated return json.loads(json.dumps(data, cls=SmarterJSONEncoder)) except Exception as e: logger.exception( "%s.to_json() - Error serializing model to JSON. model=%s, field_values=%s, exception: %s", self.formatted_class_name, self.__class__.__name__, self.__dict__, e, ) raise SmarterValueError(f"Error serializing model to JSON: {e}") from e
[docs] @classmethod def get_cached_object(cls, invalidate: Optional[bool] = False, pk: Optional[int] = None) -> Optional[models.Model]: """ Retrieve a model instance by primary key, using caching to optimize performance. This method is selectively overridden in models that inherit from TimestampedModel to provide class-specific function parameters. Example usage: .. code-block:: python # Retrieve by primary key instance = MyModel.get_cached_object(pk=1) :param invalidate: Whether to invalidate the cache for this retrieval. :type invalidate: bool, optional :param pk: The primary key of the model instance to retrieve. :type pk: int, optional :returns: The model instance if found, otherwise None. :rtype: Optional[models.Model] """ logger_prefix = formatted_text(__name__ + "." + TimestampedModel.__name__ + ".get_cached_object()") verbose_logger.debug("%s.get_cached_object() called with pk: %s, invalidate=%s", logger_prefix, pk, invalidate) if cls._meta.abstract: raise NotImplementedError( "get_cached_object() must be called on a concrete model class, not an abstract base class." ) @cache_results(timeout=cls.cache_expiration) def _get_model_by_pk(pk: int, class_name: str = cls.__name__) -> Optional[models.Model]: try: verbose_logger.debug( "%s._get_model_by_pk() cache miss for %s pk: %s", logger_prefix, class_name, pk, ) return cls.objects.get(pk=pk) except cls.DoesNotExist: verbose_logger.debug( "%s._get_model_by_pk() no object found for %s pk: %s", logger_prefix, class_name, pk, ) return None if invalidate: _get_model_by_pk.invalidate(pk, cls.__name__) if not pk: verbose_logger.debug("%s._get_model_by_pk() called with no pk", logger_prefix) return None return _get_model_by_pk(pk, class_name=cls.__name__)
[docs] @classmethod def get_cached_objects(cls, invalidate: Optional[bool] = False) -> QuerySet["TimestampedModel"]: """ Retrieve model instances using caching to optimize performance. This method is selectively overridden in models that inherit from TimestampedModel to provide class-specific function parameters. Example usage: .. code-block:: python # Retrieve all instances instances = MyModel.get_cached_objects() :param invalidate: Whether to invalidate the cache for this retrieval. :type invalidate: bool :returns: A queryset of all model instances. :rtype: QuerySet """ logger_prefix = formatted_text(__name__ + "." + cls.__name__ + ".get_cached_objects()") verbose_logger.debug("%s.get_cached_objects() called with invalidate=%s", logger_prefix, invalidate) if cls._meta.abstract: raise NotImplementedError( "get_cached_object() must be called on a concrete model class, not an abstract base class." ) @cache_results(timeout=cls.cache_expiration) def _get_all_models(class_name: str = cls.__name__) -> QuerySet["TimestampedModel"]: return cls.objects.all() if invalidate: _get_all_models.invalidate(cls.__name__) return _get_all_models()
def __str__(self): return f"{self.__class__.__name__}(id={getattr(self, 'id', None)})" def __repr__(self): return f"<{self.__class__.__name__} id={getattr(self, 'id', None)} created_at={self.created_at} updated_at={self.updated_at}>"
class MetaDataModel(TimestampedModel): """ Abstract base model that adds SAM metadata fields to a TimestampedModel Django ORM model. These are the the common fields that makeup the Pydantic SAM metadata model, along with timestamp fields for create/modify tracking. **Example Usage:** .. code-block:: python from smarter.smarter.lib.django.models import MetaDataModel from smarter.apps.account.models import User class MyModel(MetaDataModel): name = models.CharField(max_length=100) """ # pylint: disable=missing-class-docstring class Meta: abstract = True name = models.CharField( max_length=255, help_text="Name in camelCase, e.g., 'apiKey', no special characters.", validators=[SmarterValidator.validate_snake_case, validate_no_spaces], ) description = models.TextField( help_text="A brief description of this resource. Be verbose, but not too verbose.", blank=True, null=True, default="", ) version = models.CharField( max_length=255, default="1.0.0", help_text="Semantic version in the format MAJOR.MINOR.PATCH, e.g., '1.0.0'.", blank=True, null=True, ) tags = TaggableManager( blank=True, help_text="Tags for categorizing and organizing this resource.", ) annotations = models.JSONField( default=list, blank=True, null=True, help_text="Key-value pairs for annotating this resource.", encoder=SmarterJSONEncoder, ) def validate(self): """ Validate the model. """ super().validate() # version should be a semantic version: MAJOR.MINOR.PATCH if self.version and not SmarterValidator.is_valid_semantic_version(self.version): raise SmarterValueError(f"Version '{self.version}' is not a valid semantic version (MAJOR.MINOR.PATCH).") @cached_property def tags_list(self) -> list[str]: """ Return the tags as a list of strings. We assume that @cached_property is more efficient at fetch that @cache_results, all things considered equal, which provides a marginal boost to instances. Meanwhile, the @cache_results is persisted to the Django cache, and thus outlives this instance. Thus, best of both worlds. :returns: List of tag names. :rtype: list[str] """ # pylint: disable=W0613 @cache_results(timeout=self.cache_expiration) def _get_tags_by_class_and_pk(cls_name: str, pk: int) -> list[str]: """ Helper to cache tags retrieval. """ verbose_logger.debug( "%s.tags_list - Retrieving tags for %s with pk=%d from database", self.formatted_class_name, cls_name, pk, ) return [tag.name for tag in self.tags.all()] return _get_tags_by_class_and_pk(self.__class__.__name__, self.pk) @classmethod def get_cached_object( cls, invalidate: Optional[bool] = False, pk: Optional[int] = None, name: Optional[str] = None ) -> Optional["MetaDataModel"]: """ Retrieve a model instance by primary key or name, using caching to optimize performance. This method is selectively overridden in models that inherit from MetaDataModel to provide class-specific function parameters. Example usage: .. code-block:: python # Retrieve by primary key instance = MyModel.get_cached_object(pk=1) # Retrieve by name instance = MyModel.get_cached_object(name="exampleName") :param invalidate: Whether to invalidate the cache for this retrieval. :type invalidate: bool, optional :param pk: The primary key of the model instance to retrieve. :type pk: int, optional :param name: The name of the model instance to retrieve. :type name: str, optional :returns: The model instance if found, otherwise None. :rtype: Optional["MetaDataModel"] """ logger_prefix = formatted_text(__name__ + "." + MetaDataModel.__name__ + ".get_cached_object()") verbose_logger.debug( "%s.get_cached_object() called with pk: %s, name: %s, invalidate: %s", logger_prefix, pk, name, invalidate ) if cls._meta.abstract: raise NotImplementedError( "get_cached_object() must be called on a concrete model class, not an abstract base class." ) if not pk and not name: return None @cache_results(timeout=cls.cache_expiration) def _get_object_by_name(name: str, class_name: str = cls.__name__) -> Optional["MetaDataModel"]: try: verbose_logger.debug( "%s._get_object_by_name() cache miss for %s name: %s", logger_prefix, class_name, name, ) return cls.objects.prefetch_related("tags").get(name=name) except cls.DoesNotExist: verbose_logger.debug( "%s._get_object_by_name() no %s object found for name: %s", logger_prefix, class_name, name, ) return None except cls.MultipleObjectsReturned as e: logger.error( "%s.get_cached_object() - Multiple %s objects found for name '%s'. Returning the first one.", logger_prefix, class_name, name, ) raise SmarterValueError(f"Multiple {class_name} objects found for name '{name}'.") from e if invalidate: _get_object_by_name.invalidate(name, cls.__name__) if name: return _get_object_by_name(name) return super().get_cached_object(invalidate=invalidate, pk=pk) # type: ignore[return-value] @classmethod def get_cached_objects(cls, invalidate: Optional[bool] = False) -> QuerySet["MetaDataModel"]: """ Retrieve model instances using caching to optimize performance. This method is selectively overridden in models that inherit from MetaDataModel to provide class-specific function parameters. Example usage: .. code-block:: python # Retrieve all instances instances = MyModel.get_cached_objects() :param invalidate: Whether to invalidate the cache for this retrieval. :type invalidate: bool :returns: A queryset of all model instances. :rtype: QuerySet """ logger_prefix = formatted_text(__name__ + "." + cls.__name__ + ".get_cached_objects()") verbose_logger.debug( "%s.get_cached_objects() called for %s with invalidate=%s", logger_prefix, cls.__name__, invalidate ) if cls._meta.abstract: raise NotImplementedError( "get_cached_object() must be called on a concrete model class, not an abstract base class." ) if invalidate: pass return super().get_cached_objects(invalidate=invalidate) # type: ignore