"""
Base class for vector store backends.
This class defines the interface that all
vectorstore backends must implement. It includes methods for creating, deleting,
upserting, querying, and getting stats for vector databases. Each backend should
inherit from this class and provide concrete implementations for these methods
based on the specific vector store being used (e.g., Pinecone, Weaviate, etc.).
"""
import logging
from abc import ABC, abstractmethod
from typing import Any, Optional
from langchain_core.documents import Document
from langchain_core.embeddings.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore
from smarter.apps.vectorstore.models import VectorestoreMeta
from smarter.apps.vectorstore.signals import connected
from smarter.common.exceptions import SmarterException
from smarter.common.mixins import SmarterHelperMixin
from smarter.lib.django import waffle
from smarter.lib.django.waffle import SmarterWaffleSwitches
from smarter.lib.logging import WaffleSwitchedLoggerWrapper
# pylint: disable=unused-argument
[docs]
def should_log(level):
"""Check if logging should be done based on the waffle switch."""
return waffle.switch_is_active(SmarterWaffleSwitches.VECTORSTORE_LOGGING)
base_logger = logging.getLogger(__name__)
logger = WaffleSwitchedLoggerWrapper(base_logger, should_log)
[docs]
class VectorStoreBackendError(SmarterException):
"""
Exception raised when there is an error with the vector store backend.
"""
[docs]
class VectorStoreBackendConnectionError(SmarterException):
"""
Exception raised when there is an error with the vector store backend connection.
"""
[docs]
class VectorStoreBackendConnection(SmarterHelperMixin):
"""
Represents a connection to a vector store backend.
"""
_connection: Optional[object]
@property
def ready(self) -> bool:
"""
Check if the connection is ready for operations.
"""
return super().ready
[docs]
def connect(self):
"""
Establish the connection to the vector store backend.
"""
connected.send(sender=self.__class__, instance=self, connection=self._connection)
[docs]
class SmarterVectorstoreBackend(ABC, SmarterHelperMixin):
"""
Abstract base class for vector store backends.
This class defines the service interface that all vector store backends must implement.
All concrete backends (e.g., Pinecone, Weaviate, etc.) should inherit from this
class and provide implementations for all abstract methods. The backend is responsible
for managing the connection, storing and retrieving vectors, and handling database operations.
Parameters
----------
db : VectorestoreMeta
The vector database instance to use.
embeddings : Optional[Embeddings], optional
The embeddings model to use for vectorization (default is None).
vector_store : Optional[VectorStore], optional
The vector store object (default is None).
Methods
-------
add_documents(documents, embeddings)
Add documents with their corresponding embeddings to the vector store.
initialize()
Initialize the backend, setting up any necessary connections or configurations.
create()
Provision a new vector database in the backend.
delete()
Delete the vector database from the backend.
upsert(vectors)
Upsert vectors into the vector database in the backend.
query(query_vector, top_k=10)
Query the vector database in the backend.
connect()
Establish a connection to the vector database in the backend.
disconnect()
Disconnect from the vector database in the backend.
load(embeddings)
Load vectors into the vector database from a list of embeddings.
Properties
----------
index_stats : str
Get statistics about the vector database in the backend.
vector_store : object
Get the vector store object for the backend.
embeddings : Embeddings
Get the embeddings model.
connection : VectorStoreBackendConnection
Get or establish the connection to the vector database in the backend.
is_connected : bool
Check if there is an active connection to the vector database in the backend.
ready : bool
Check if the backend is ready for operations.
"""
# Internal state variables for lazy initialization
_connection: Optional[VectorStoreBackendConnection] = None
_embeddings: Optional[Embeddings] = None
_vector_store: Optional[VectorStore] = None
db: VectorestoreMeta
[docs]
def __init__(
self,
*args,
db: VectorestoreMeta,
embeddings: Optional[Embeddings] = None,
vector_store: Optional[VectorStore] = None,
**kwargs,
):
SmarterHelperMixin.__init__(self)
self.db = db
self._connection = None
self._embeddings = embeddings
self._vector_store = vector_store
logger.debug("%s.__init__() Initializing backend for database: %s", self.formatted_class_name, db)
@property
def index_stats(self) -> str:
"""
Get statistics about the vector database in the backend.
"""
raise NotImplementedError("Index stats method not implemented for this backend")
@property
def vector_store(self) -> object:
"""
Get the vector store object for the backend.
"""
if self._vector_store is None:
raise NotImplementedError("Vector store property not implemented for this backend")
return self._vector_store
@property
def embeddings(self) -> Embeddings:
"""Get the embeddings."""
if self._embeddings is None:
raise NotImplementedError("Embeddings property not implemented for this backend")
return self._embeddings
@property
def connection(self) -> VectorStoreBackendConnection:
"""
Get the connection to the vector database in the backend, establishing
it if it doesn't already exist.
"""
if self._connection is None:
self._connection = self.connect()
return self._connection
@property
def is_connected(self) -> bool:
"""
Check if there is an active connection to the vector database in the backend.
"""
return self._connection is not None and self._connection.ready
@property
def ready(self) -> bool:
"""
Check if the backend is ready for operations.
"""
return super().ready and self.is_connected
###########################################################################
# Abstract methods that must be implemented by all backends
###########################################################################
[docs]
@abstractmethod
def add_documents(self, documents: list[Document], embeddings: list[Any]) -> bool:
"""
Add documents with their corresponding embeddings to the vector store.
"""
raise NotImplementedError("Add documents method not implemented for this backend")
[docs]
@abstractmethod
def connect(self) -> VectorStoreBackendConnection:
"""
Establish a connection to the vector database in the backend.
"""
raise NotImplementedError("Connect method not implemented for this backend")
[docs]
@abstractmethod
def create(self):
"""
Provision a new vector database in the backend.
"""
raise NotImplementedError("Create method not implemented for this backend")
[docs]
@abstractmethod
def delete(self):
"""
Delete the vector database from the backend.
"""
raise NotImplementedError("Delete method not implemented for this backend")
[docs]
@abstractmethod
def disconnect(self) -> None:
"""
Disconnect from the vector database in the backend.
"""
raise NotImplementedError("Disconnect method not implemented for this backend")
[docs]
@abstractmethod
def initialize(self):
"""
Initialize the backend, setting up any necessary connections or configurations.
"""
raise NotImplementedError("Initialize method not implemented for this backend")
[docs]
@abstractmethod
def query(self, query_vector: Any, top_k: int = 10):
"""
Query the vector database in the backend.
"""
raise NotImplementedError("Query method not implemented for this backend")