Source code for autorag.vectordb.base
from abc import abstractmethod
from typing import List, Tuple, Union
from llama_index.embeddings.openai import OpenAIEmbedding
from autorag.utils.util import openai_truncate_by_token
from autorag.embedding.base import EmbeddingModel
[docs]
class BaseVectorStore:
support_similarity_metrics = ["l2", "ip", "cosine"]
def __init__(
self,
embedding_model: Union[str, List[dict]],
similarity_metric: str = "cosine",
embedding_batch: int = 100,
):
self.embedding = EmbeddingModel.load(embedding_model)()
self.embedding_batch = embedding_batch
self.embedding.embed_batch_size = embedding_batch
assert similarity_metric in self.support_similarity_metrics, (
f"search method {similarity_metric} is not supported"
)
self.similarity_metric = similarity_metric
[docs]
@abstractmethod
async def add(
self,
ids: List[str],
texts: List[str],
):
pass
[docs]
@abstractmethod
def add_embedding(self, ids: List[str], embeddings: List[List[float]]):
"""
Add the embeddings to the Vector DB.
"""
pass
[docs]
@abstractmethod
async def query(
self, queries: List[str], top_k: int, **kwargs
) -> Tuple[List[List[str]], List[List[float]]]:
pass
[docs]
@abstractmethod
async def fetch(self, ids: List[str]) -> List[List[float]]:
"""
Fetch the embeddings of the ids.
"""
pass
[docs]
@abstractmethod
async def is_exist(self, ids: List[str]) -> List[bool]:
"""
Check if the ids exist in the Vector DB.
"""
pass
[docs]
@abstractmethod
async def delete(self, ids: List[str]):
pass