"""Django ORM base model"""
import base64
import datetime
import json
import re
from functools import cached_property
from logging import getLogger
from typing import Any, Optional
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.query import QuerySet
from django.forms.models import model_to_dict
from django.utils.timezone import is_aware, make_aware
from taggit.managers import TaggableManager
from smarter.common.conf import smarter_settings
from smarter.common.exceptions import SmarterValueError
from smarter.common.helpers.console_helpers import formatted_text
from smarter.common.mixins import SmarterHelperMixin
from smarter.lib.cache import cache_results
from smarter.lib.django.validators import SmarterValidator
from smarter.lib.json import SmarterJSONEncoder
from smarter.lib.logging import WaffleSwitchedLoggerWrapper
logger = getLogger(__name__)
cache_prefix = f"{__name__}."
# pylint: disable=W0613
def should_log_verbose(level):
"""Check if logging should be done based on the waffle switch."""
return smarter_settings.verbose_logging
verbose_logger = WaffleSwitchedLoggerWrapper(logger, should_log_verbose)
def validate_no_spaces(value) -> None:
"""Validate that the string does not contain spaces."""
if " " in value:
raise SmarterValueError(f"Value must not contain spaces: {value}")
[docs]
class TimestampedModel(models.Model, SmarterHelperMixin):
"""
Abstract base model for all Django ORM models in the Smarter project, providing automatic
timestamp fields and utility methods.
This class should be used as the base class for all models in the project to ensure
consistent tracking of creation and modification times. It adds ``created_at`` and
``updated_at`` fields, and provides validation and time-difference utilities.
**Example Usage:**
.. code-block:: python
from smarter.smarter.lib.django.models import TimestampedModel
class MyModel(TimestampedModel):
name = models.CharField(max_length=100)
# Creating an instance
obj = MyModel.objects.create(name="Example")
print(obj.created_at) # Timestamp of creation
print(obj.updated_at) # Timestamp of last update
# Checking elapsed time since last update
seconds = obj.elapsed_updated()
print(f"Seconds since last update: {seconds}")
**Parameters:**
Inherits all parameters from ``django.db.models.Model``.
.. note::
- This class is abstract and will not create a database table by itself.
- The ``validate()`` method is a stub and should be implemented in subclasses as needed.
- The ``save()`` method enforces validation before saving, raising a detailed error if validation fails.
.. important::
- If you override ``save()``, ensure you call ``super().save(*args, **kwargs)`` to retain validation and timestamp behavior.
- The ``elapsed_updated`` property expects ``updated_at`` to be set; if not, it returns ``None``.
- Passing a non-datetime object to ``elapsed_updated`` will raise a ``TypeError``.
- The hashed ID methods provide a way to encode and decode object IDs for use in URLs
in cases where you want to avoid exposing raw database IDs.
"""
HASH_PREFIX = "r"
HASH_SUFFIX = "x"
HASH_FLOOR = 1000000
_hash_regex = None
cache_expiration = smarter_settings.cache_expiration
# pylint: disable=missing-class-docstring
class Meta:
abstract = True
created_at = models.DateTimeField(auto_now_add=True, null=True, editable=False, db_index=True)
"""
Timestamp indicating when the model instance was created.
This field is automatically set to the current date and time when the instance is first created.
It is indexed in the database for efficient querying.
"""
updated_at = models.DateTimeField(auto_now=True, null=True, editable=False, db_index=True)
"""
Timestamp indicating when the model instance was last updated.
This field is automatically updated to the current date and time whenever the instance is saved.
It is indexed in the database for efficient querying.
"""
###########################################################################
# public methods for internal use.
###########################################################################
[docs]
@classmethod
def hash_regex(cls) -> re.Pattern:
"""
Returns a regex pattern that matches the hashed ID format for this model anywhere in a string.
The hashed ID format is defined by the ``HASH_PREFIX`` and ``HASH_SUFFIX`` class attributes,
with a base64-encoded string in between. This regex can be used to validate or extract
hashed IDs from strings, including when embedded in URLs.
:returns: A regex pattern for matching hashed IDs.
:rtype: re.Pattern
"""
if cls._hash_regex is None:
cls._hash_regex = re.compile(f"{cls.HASH_PREFIX}[A-Za-z0-9_-]+{cls.HASH_SUFFIX}")
return cls._hash_regex
[docs]
@cached_property
def hashed_id(self) -> str:
"""
Returns a URL-friendly hashed version of the object's ID for use in URLs and other
contexts where an obscured, non-identifying, non-sequential identifier is preferred.
Encoding scheme:
1. Take the object's ID and add a large constant (HASH_FLOOR) to ensure it's not easily guessable.
2. Convert the resulting number to a string and encode it using URL-safe base64 encoding.
3. Remove any padding characters from the encoded string.
4. Add a prefix and suffix to the encoded string to create a recognizable format.
Example:
.. code-block:: python
obj = MyModel.objects.create()
print(obj.id) # e.g., 123
print(obj.hashed_id) # e.g., "rc2x"
:returns: Hashed ID string (URL-safe, no padding)
:rtype: str
"""
id_value = int(self.id) + self.HASH_FLOOR
encoded = str(base64.urlsafe_b64encode(str(id_value).encode()).decode().rstrip("="))
padded_encoded = f"{self.HASH_PREFIX}{encoded}{self.HASH_SUFFIX}"
return padded_encoded
[docs]
@classmethod
def id_from_hashed_id(cls, hashed_id: str) -> Optional[int]:
"""
Decodes a hashed ID back to the original object ID.
decoding scheme:
1. Validate that the hashed ID starts with the expected prefix and ends with the expected suffix.
2. Remove the prefix and suffix to isolate the base64-encoded string.
3. Add padding if necessary to make the length of the encoded string a multiple of 4.
4. Decode the base64 string to get the original number as a string.
5. Convert the decoded string to an integer and subtract the HASH_FLOOR to get the original ID.
Example:
.. code-block:: python
my_record = MyModel.objects.create()
print(my_record.id) # e.g., 123
hashed_id = my_record.hashed_id # e.g., "rc2x"
original_id = MyModel.id_from_hashed_id(hashed_id)
print(original_id) # Should print the original ID (e.g., 123)
:param hashed_id: The hashed ID string to decode (URL-safe, no padding).
:returns: The original object ID if decoding is successful, otherwise None.
:rtype: Optional[int]
"""
try:
verbose_logger.debug(
"%s.id_from_hashed_id() - Attempting to decode hashed_id: %s",
cls.formatted_class_name,
hashed_id,
)
if not hashed_id.startswith(cls.HASH_PREFIX) or not hashed_id.endswith(cls.HASH_SUFFIX):
logger.warning(
"%s.id_from_hashed_id() - Hashed ID '%s' does not start with '%s' or end with '%s'.",
cls.formatted_class_name,
hashed_id,
cls.HASH_PREFIX,
cls.HASH_SUFFIX,
)
return None
encoded_str = hashed_id[len(cls.HASH_PREFIX) : -len(cls.HASH_SUFFIX)]
# Add padding if needed
padding = "=" * (-len(encoded_str) % 4)
encoded_str += padding
decoded_bytes = base64.urlsafe_b64decode(encoded_str.encode())
decoded_str = decoded_bytes.decode()
retval = int(decoded_str) - cls.HASH_FLOOR
verbose_logger.debug(
"%s.id_from_hashed_id() - Successfully decoded hashed_id: %s to id: %d",
cls.formatted_class_name,
hashed_id,
retval,
)
return retval
except (base64.binascii.Error, ValueError) as e:
logger.error(
"%s.id_from_hashed_id() - Failed to decode hashed_id '%s': %s",
cls.formatted_class_name,
hashed_id,
e,
)
return None
# pylint: disable=broad-except
except Exception as e:
logger.exception(
"%s.id_from_hashed_id() - Unexpected error while decoding hashed_id '%s': %s",
cls.formatted_class_name,
hashed_id,
e,
)
return None
[docs]
@classmethod
def find_hash(cls, value: str) -> Optional[str]:
"""
Finds and returns the first substring in the given value that matches
the hashed ID format.
:param value: The string to search for a hashed ID.
:returns: The first matching hashed ID if found, otherwise None.
:rtype: Optional[str]
"""
verbose_logger.debug(
"%s.find_hash() - Searching for hashed ID in value: %s",
cls.formatted_class_name,
value,
)
pattern = cls.hash_regex()
match = pattern.search(value)
retval = match.group(0) if match else None
if retval:
verbose_logger.debug(
"%s.find_hash() - Found hashed ID: %s",
cls.formatted_class_name,
retval,
)
else:
verbose_logger.debug(
"%s.find_hash() - No hashed ID found in value: %s",
cls.formatted_class_name,
value,
)
return retval
###########################################################################
# public methods for public use.
###########################################################################
[docs]
def validate(self):
"""
Validate the model.
.. attention::
Intended to be overridden in subclasses to provide custom validation logic.
"""
[docs]
def save(self, *args, **kwargs):
"""
Save the model instance to the database, performing validation before the actual save.
This method overrides the default ``save()`` behavior of Django models to ensure that
the model is validated by calling :meth:`validate` before any data is written to the database.
If validation fails, a :exc:`django.core.exceptions.ValidationError` is raised with detailed
information about the error, the arguments passed, the model class, and the current field values.
Parameters
----------
*args
Positional arguments passed to the parent ``save()`` method. These are forwarded to Django's ORM.
**kwargs
Keyword arguments passed to the parent ``save()`` method. These are forwarded to Django's ORM.
Examples
--------
.. code-block:: python
obj = MyModel(name="Example")
obj.save() # Will call validate() before saving
.. note::
- The :meth:`validate` method is intended to be overridden in subclasses to provide custom validation logic.
- If :meth:`validate` raises a :exc:`ValidationError`, the save operation is aborted and the error is propagated.
- The error message includes the arguments, keyword arguments, model class, and current field values for easier debugging.
.. important::
- If you override this method in a subclass, always call ``super().save(*args, **kwargs)`` to retain validation and timestamp functionality.
- If validation fails, no data will be saved to the database.
"""
try:
self.validate()
except (ValidationError, SmarterValueError) as e:
raise SmarterValueError(
f"TimestampedModel().save() validation error: {e} | args={args} kwargs={kwargs} | model={self.__class__.__name__} | field_values={self.__dict__}"
) from e
except Exception as e:
raise SmarterValueError(
f"TimestampedModel().save() unexpected error during validation: {e} | args={args} kwargs={kwargs} | model={self.__class__.__name__} | field_values={self.__dict__}"
) from e
super().save(*args, **kwargs)
[docs]
@cached_property
def record_locator(self) -> str:
"""
Returns a short, URL-friendly record locator derived from the object's ID.
Example:
.. code-block:: python
obj = MyModel.objects.create(name="Example")
print(obj.id) # e.g., 123
print(obj.record_locator) # e.g., "chatbot-rc2x"
:returns: Record locator string (URL-safe, no padding)
:rtype: str
"""
prefix = str(self.__class__.__name__).lower()
return f"{prefix}-{self.hashed_id}"
[docs]
@classmethod
def get_object_by_locator(cls, locator: str) -> Optional["TimestampedModel"]:
"""
Retrieves an object based on its record locator.
Example:
.. code-block:: python
obj = MyModel.objects.create()
print(obj.id) # e.g., 123
locator = obj.record_locator # e.g., "mymodel-rc2x"
retrieved_obj = MyModel.get_object_by_locator(locator)
print(type(retrieved_obj)) # Should be <class 'MyModel'>
print(retrieved_obj) # Should be the same as obj
:param locator: The record locator string to decode and search for.
:returns: The model instance if found, otherwise None.
:rtype: Optional[TimestampedModel]
"""
verbose_logger.debug(
"%s.get_object_by_locator() - Attempting to retrieve object with locator: %s",
cls.formatted_class_name,
locator,
)
try:
prefix = str(cls.__name__).lower()
if not locator.startswith(f"{prefix}-"):
logger.warning(
"%s.get_object_by_locator() - Locator '%s' does not start with expected prefix '%s-'.",
cls.formatted_class_name,
locator,
prefix,
)
return None
hashed_part = locator[len(prefix) + 1 :].lstrip("0")
id_value = cls.id_from_hashed_id(hashed_part)
if id_value is None:
logger.warning(
"%s.get_object_by_locator() - Failed to decode hashed part '%s' from locator '%s'.",
cls.formatted_class_name,
hashed_part,
locator,
)
return None
obj = cls.get_cached_object(pk=id_value)
if obj is None:
logger.warning(
"%s.get_object_by_locator() - No object found with ID %d decoded from locator '%s'.",
cls.formatted_class_name,
id_value,
locator,
)
else:
verbose_logger.debug(
"%s.get_object_by_locator() - Successfully retrieved object with ID %d from locator '%s'.",
cls.formatted_class_name,
id_value,
locator,
)
return obj # type: ignore[return-value]
# pylint: disable=broad-except
except Exception as e:
logger.exception(
"%s.get_object_by_locator() - Unexpected error while retrieving object with locator '%s': %s",
cls.formatted_class_name,
locator,
e,
)
return None
@property
def elapsed_updated(self, dt=None) -> Optional[int]:
"""
Calculate the absolute time difference in seconds between a given datetime and the model's ``updated_at`` timestamp.
This property is useful for determining how much time has elapsed since the model instance was last updated,
or for comparing the ``updated_at`` field to any arbitrary datetime.
**Parameters:**
- dt (datetime, optional):
The reference datetime to compare against ``updated_at``.
- If ``dt`` is not provided, the current time is used.
- Both naive and timezone-aware datetime objects are supported; the method will handle conversions as needed.
**Returns:**
- int or None:
The absolute difference in seconds between ``updated_at`` and ``dt``.
Returns ``None`` if ``updated_at`` is not set.
**Example Usage:**
.. code-block:: python
obj = MyModel.objects.get(pk=1)
# Time since last update
seconds = obj.elapsed_updated
print(f"Seconds since last update: {seconds}")
# Compare to a specific datetime
import datetime
dt = datetime.datetime(2025, 12, 1, 12, 0, 0)
diff = obj.elapsed_updated(dt)
print(f"Seconds between updated_at and 2025-12-01 12:00:00: {diff}")
.. note::
- Handles both naive and aware datetime objects, converting as necessary to ensure accurate calculation.
- If ``updated_at`` is not set (e.g., the object has not been saved), returns ``None``.
.. attention::
- If ``dt`` is provided and is not a ``datetime.datetime`` instance, a ``TypeError`` will be raised.
- Always ensure that ``updated_at`` is set before relying on this property for calculations.
"""
utc = datetime.timezone.utc
if not self.updated_at:
return None
if dt is None:
dt = datetime.datetime.now(utc) if is_aware(self.updated_at) else datetime.datetime.now()
if not isinstance(dt, datetime.datetime):
raise TypeError(f"Expected a datetime object, got {type(dt)} instead.")
updated = self.updated_at
if is_aware(updated) and not is_aware(dt):
dt = make_aware(dt, utc)
elif not is_aware(updated) and is_aware(dt):
updated = make_aware(updated, utc)
delta = int(abs((updated - dt).total_seconds()))
return delta
[docs]
def to_json(self) -> dict[str, Any]:
"""
Serialize the model instance to a JSON-compatible dictionary.
This method uses the custom ``SmarterJSONEncoder`` to ensure that all fields,
including timestamps and any complex data types, are properly serialized.
:returns: A dictionary representation of the model instance suitable for JSON serialization.
:rtype: dict[str, Any]
"""
try:
data = model_to_dict(self)
data["record_locator"] = self.record_locator
data["elapsed_updated"] = self.elapsed_updated
return json.loads(json.dumps(data, cls=SmarterJSONEncoder))
except Exception as e:
logger.exception(
"%s.to_json() - Error serializing model to JSON. model=%s, field_values=%s, exception: %s",
self.formatted_class_name,
self.__class__.__name__,
self.__dict__,
e,
)
raise SmarterValueError(f"Error serializing model to JSON: {e}") from e
[docs]
@classmethod
def get_cached_object(cls, invalidate: Optional[bool] = False, pk: Optional[int] = None) -> Optional[models.Model]:
"""
Retrieve a model instance by primary key, using caching to
optimize performance. This method is selectively overridden in
models that inherit from TimestampedModel to provide class-specific
function parameters.
Example usage:
.. code-block:: python
# Retrieve by primary key
instance = MyModel.get_cached_object(pk=1)
:param invalidate: Whether to invalidate the cache for this retrieval.
:type invalidate: bool, optional
:param pk: The primary key of the model instance to retrieve.
:type pk: int, optional
:returns: The model instance if found, otherwise None.
:rtype: Optional[models.Model]
"""
logger_prefix = formatted_text(__name__ + "." + TimestampedModel.__name__ + ".get_cached_object()")
verbose_logger.debug("%s.get_cached_object() called with pk: %s, invalidate=%s", logger_prefix, pk, invalidate)
if cls._meta.abstract:
raise NotImplementedError(
"get_cached_object() must be called on a concrete model class, not an abstract base class."
)
@cache_results(timeout=cls.cache_expiration)
def _get_model_by_pk(pk: int, class_name: str = cls.__name__) -> Optional[models.Model]:
try:
verbose_logger.debug(
"%s._get_model_by_pk() cache miss for %s pk: %s",
logger_prefix,
class_name,
pk,
)
return cls.objects.get(pk=pk)
except cls.DoesNotExist:
verbose_logger.debug(
"%s._get_model_by_pk() no object found for %s pk: %s",
logger_prefix,
class_name,
pk,
)
return None
if invalidate:
_get_model_by_pk.invalidate(pk, cls.__name__)
if not pk:
verbose_logger.debug("%s._get_model_by_pk() called with no pk", logger_prefix)
return None
return _get_model_by_pk(pk, class_name=cls.__name__)
[docs]
@classmethod
def get_cached_objects(cls, invalidate: Optional[bool] = False) -> QuerySet["TimestampedModel"]:
"""
Retrieve model instances using caching to optimize performance.
This method is selectively overridden in models that inherit from
TimestampedModel to provide class-specific function parameters.
Example usage:
.. code-block:: python
# Retrieve all instances
instances = MyModel.get_cached_objects()
:param invalidate: Whether to invalidate the cache for this retrieval.
:type invalidate: bool
:returns: A queryset of all model instances.
:rtype: QuerySet
"""
logger_prefix = formatted_text(__name__ + "." + cls.__name__ + ".get_cached_objects()")
verbose_logger.debug("%s.get_cached_objects() called with invalidate=%s", logger_prefix, invalidate)
if cls._meta.abstract:
raise NotImplementedError(
"get_cached_object() must be called on a concrete model class, not an abstract base class."
)
@cache_results(timeout=cls.cache_expiration)
def _get_all_models(class_name: str = cls.__name__) -> QuerySet["TimestampedModel"]:
return cls.objects.all()
if invalidate:
_get_all_models.invalidate(cls.__name__)
return _get_all_models()
def __str__(self):
return f"{self.__class__.__name__}(id={getattr(self, 'id', None)})"
def __repr__(self):
return f"<{self.__class__.__name__} id={getattr(self, 'id', None)} created_at={self.created_at} updated_at={self.updated_at}>"
class MetaDataModel(TimestampedModel):
"""
Abstract base model that adds SAM metadata fields to a
TimestampedModel Django ORM model. These are the
the common fields that makeup the Pydantic SAM metadata model,
along with timestamp fields for create/modify tracking.
**Example Usage:**
.. code-block:: python
from smarter.smarter.lib.django.models import MetaDataModel
from smarter.apps.account.models import User
class MyModel(MetaDataModel):
name = models.CharField(max_length=100)
"""
# pylint: disable=missing-class-docstring
class Meta:
abstract = True
name = models.CharField(
max_length=255,
help_text="Name in camelCase, e.g., 'apiKey', no special characters.",
validators=[SmarterValidator.validate_snake_case, validate_no_spaces],
)
description = models.TextField(
help_text="A brief description of this resource. Be verbose, but not too verbose.",
blank=True,
null=True,
default="",
)
version = models.CharField(
max_length=255,
default="1.0.0",
help_text="Semantic version in the format MAJOR.MINOR.PATCH, e.g., '1.0.0'.",
blank=True,
null=True,
)
tags = TaggableManager(
blank=True,
help_text="Tags for categorizing and organizing this resource.",
)
annotations = models.JSONField(
default=list,
blank=True,
null=True,
help_text="Key-value pairs for annotating this resource.",
encoder=SmarterJSONEncoder,
)
def validate(self):
"""
Validate the model.
"""
super().validate()
# version should be a semantic version: MAJOR.MINOR.PATCH
if self.version and not SmarterValidator.is_valid_semantic_version(self.version):
raise SmarterValueError(f"Version '{self.version}' is not a valid semantic version (MAJOR.MINOR.PATCH).")
@cached_property
def tags_list(self) -> list[str]:
"""
Return the tags as a list of strings. We assume that @cached_property
is more efficient at fetch that @cache_results, all things considered
equal, which provides a marginal boost to instances. Meanwhile, the
@cache_results is persisted to the Django cache, and thus outlives
this instance. Thus, best of both worlds.
:returns: List of tag names.
:rtype: list[str]
"""
# pylint: disable=W0613
@cache_results(timeout=self.cache_expiration)
def _get_tags_by_class_and_pk(cls_name: str, pk: int) -> list[str]:
"""
Helper to cache tags retrieval.
"""
verbose_logger.debug(
"%s.tags_list - Retrieving tags for %s with pk=%d from database",
self.formatted_class_name,
cls_name,
pk,
)
return [tag.name for tag in self.tags.all()]
return _get_tags_by_class_and_pk(self.__class__.__name__, self.pk)
@classmethod
def get_cached_object(
cls, invalidate: Optional[bool] = False, pk: Optional[int] = None, name: Optional[str] = None
) -> Optional["MetaDataModel"]:
"""
Retrieve a model instance by primary key or name, using caching to
optimize performance. This method is selectively overridden in
models that inherit from MetaDataModel to provide class-specific
function parameters.
Example usage:
.. code-block:: python
# Retrieve by primary key
instance = MyModel.get_cached_object(pk=1)
# Retrieve by name
instance = MyModel.get_cached_object(name="exampleName")
:param invalidate: Whether to invalidate the cache for this retrieval.
:type invalidate: bool, optional
:param pk: The primary key of the model instance to retrieve.
:type pk: int, optional
:param name: The name of the model instance to retrieve.
:type name: str, optional
:returns: The model instance if found, otherwise None.
:rtype: Optional["MetaDataModel"]
"""
logger_prefix = formatted_text(__name__ + "." + MetaDataModel.__name__ + ".get_cached_object()")
verbose_logger.debug(
"%s.get_cached_object() called with pk: %s, name: %s, invalidate: %s", logger_prefix, pk, name, invalidate
)
if cls._meta.abstract:
raise NotImplementedError(
"get_cached_object() must be called on a concrete model class, not an abstract base class."
)
if not pk and not name:
return None
@cache_results(timeout=cls.cache_expiration)
def _get_object_by_name(name: str, class_name: str = cls.__name__) -> Optional["MetaDataModel"]:
try:
verbose_logger.debug(
"%s._get_object_by_name() cache miss for %s name: %s",
logger_prefix,
class_name,
name,
)
return cls.objects.prefetch_related("tags").get(name=name)
except cls.DoesNotExist:
verbose_logger.debug(
"%s._get_object_by_name() no %s object found for name: %s",
logger_prefix,
class_name,
name,
)
return None
except cls.MultipleObjectsReturned as e:
logger.error(
"%s.get_cached_object() - Multiple %s objects found for name '%s'. Returning the first one.",
logger_prefix,
class_name,
name,
)
raise SmarterValueError(f"Multiple {class_name} objects found for name '{name}'.") from e
if invalidate:
_get_object_by_name.invalidate(name, cls.__name__)
if name:
return _get_object_by_name(name)
return super().get_cached_object(invalidate=invalidate, pk=pk) # type: ignore[return-value]
@classmethod
def get_cached_objects(cls, invalidate: Optional[bool] = False) -> QuerySet["MetaDataModel"]:
"""
Retrieve model instances using caching to optimize performance.
This method is selectively overridden in models that inherit from
MetaDataModel to provide class-specific function parameters.
Example usage:
.. code-block:: python
# Retrieve all instances
instances = MyModel.get_cached_objects()
:param invalidate: Whether to invalidate the cache for this retrieval.
:type invalidate: bool
:returns: A queryset of all model instances.
:rtype: QuerySet
"""
logger_prefix = formatted_text(__name__ + "." + cls.__name__ + ".get_cached_objects()")
verbose_logger.debug(
"%s.get_cached_objects() called for %s with invalidate=%s", logger_prefix, cls.__name__, invalidate
)
if cls._meta.abstract:
raise NotImplementedError(
"get_cached_object() must be called on a concrete model class, not an abstract base class."
)
if invalidate:
pass
return super().get_cached_objects(invalidate=invalidate) # type: ignore