Module zep_python.document

Expand source code
from zep_python.document.collections import DocumentCollection
from zep_python.document.models import Document

__all__ = [
    "Document",
    "DocumentCollection",
]

Sub-modules

zep_python.document.client
zep_python.document.collections
zep_python.document.models

Classes

class Document (**data: Any)

Represents a document base.

Attributes

uuid : Optional[str]
The unique identifier of the document.
created_at : Optional[datetime]
The timestamp of when the document was created.
updated_at : Optional[datetime]
The timestamp of when the document was last updated.
document_id : Optional[str]
The unique identifier of the document (name or some id).
content : str
The content of the document.
metadata : Optional[Dict[str, Any]]
Any additional metadata associated with the document.
is_embedded : Optional[bool]
Whether the document has an embedding.
embedding : Optional[List[float]]
The embedding of the document.
score : Optional[float]
The normed score of the search result. Available only when the document is returned as part of a query result.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class Document(BaseModel):
    """
    Represents a document base.

    Attributes
    ----------
    uuid : Optional[str]
        The unique identifier of the document.
    created_at : Optional[datetime]
        The timestamp of when the document was created.
    updated_at : Optional[datetime]
        The timestamp of when the document was last updated.
    document_id : Optional[str]
        The unique identifier of the document (name or some id).
    content : str
        The content of the document.
    metadata : Optional[Dict[str, Any]]
        Any additional metadata associated with the document.
    is_embedded : Optional[bool]
        Whether the document has an embedding.
    embedding : Optional[List[float]]
        The embedding of the document.
    score : Optional[float]
        The normed score of the search result. Available only
        when the document is returned as part of a query result.
    """

    uuid: Optional[str] = None
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    document_id: Optional[str] = Field(default=None, max_length=100)
    content: str = Field(..., min_length=1)
    metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
    is_embedded: Optional[bool] = None
    embedding: Optional[List[float]] = None
    score: Optional[float] = None

    def to_dict(self) -> Dict[str, Any]:
        """
        Returns a dictionary representation of the document.

        Returns
        -------
        Dict[str, Any]
            A dictionary containing the attributes of the document.
        """
        return self.model_dump()

Ancestors

  • pydantic.main.BaseModel

Class variables

var content : str
var created_at : Optional[datetime.datetime]
var document_id : Optional[str]
var embedding : Optional[List[float]]
var is_embedded : Optional[bool]
var metadata : Optional[Dict[str, Any]]
var model_computed_fields
var model_config
var model_fields
var score : Optional[float]
var updated_at : Optional[datetime.datetime]
var uuid : Optional[str]

Methods

def to_dict(self) ‑> Dict[str, Any]

Returns a dictionary representation of the document.

Returns

Dict[str, Any]
A dictionary containing the attributes of the document.
Expand source code
def to_dict(self) -> Dict[str, Any]:
    """
    Returns a dictionary representation of the document.

    Returns
    -------
    Dict[str, Any]
        A dictionary containing the attributes of the document.
    """
    return self.model_dump()
class DocumentCollection (aclient: Optional[httpx.AsyncClient] = None, client: Optional[httpx.Client] = None, **kwargs: Any)

Represents a collection of documents.

Attributes

uuid : str
The unique identifier of the collection.
created_at : Optional[datetime]
The timestamp of when the collection was created.
updated_at : Optional[datetime]
The timestamp of when the collection was last updated.
name : str
The unique name of the collection.
description : Optional[str]
The description of the collection.
metadata : Optional[Dict[str, Any]]
Any additional metadata associated with the collection.
embedding_dimensions : int
The dimensions of the embedding model.
is_auto_embedded : bool
Flag to indicate whether the documents in the collection should be automatically embedded by Zep. (Default: True)
is_indexed : bool
Flag indicating whether an index has been created for this collection.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class DocumentCollection(DocumentCollectionModel):
    __doc__ = DocumentCollectionModel.__doc__ or ""

    _client: Optional[httpx.Client] = PrivateAttr(default=None)
    _aclient: Optional[httpx.AsyncClient] = PrivateAttr(default=None)

    def __init__(
        self,
        aclient: Optional[httpx.AsyncClient] = None,
        client: Optional[httpx.Client] = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self._aclient = aclient
        self._client = client

    @property
    def status(self) -> str:
        """
        Get the status of the collection.

        Returns
        -------
        str
            The status of the collection.

            `ready`: All documents have been embedded and the collection is ready for
            search.

            `pending`: The collection is still processing.
        """
        if self.document_count and (
            self.document_embedded_count == self.document_count
        ):
            return "ready"
        else:
            return "pending"

    async def aadd_documents(
        self,
        documents: List[Document],
        batch_size: int = DEFAULT_BATCH_SIZE,
    ) -> List[str]:
        """
        Asynchronously create documents.


        documents : List[Document]
            A list of Document objects representing the documents to create.
        batch_size : int, optional
            The number of documents to upload in each batch. Defaults to 500.

        Returns
        -------
        List[str]
            The UUIDs of the created documents.

        Raises
        ------
        APIError
            If the API response format is unexpected.
        """

        if not self._aclient:
            raise ValueError(
                "Can only add documents once a collection has been created"
            )

        if documents is None:
            raise ValueError("document list must be provided")

        uuids: List[str] = []
        for batch in generate_batches(documents, batch_size):
            response = await self._aclient.post(
                f"/collections/{urllib.parse.quote_plus(self.name)}/documents",
                json=batch,
            )

            handle_response(response)

            uuids.extend(response.json())

        return uuids

    def add_documents(
        self,
        documents: List[Document],
        batch_size: int = DEFAULT_BATCH_SIZE,
    ) -> List[str]:
        """
        Create documents.


        documents : List[Document]
            A list of Document objects representing the documents to create.

        Returns
        -------
        List[str]
            The UUIDs of the created documents.
        batch_size : int, optional
            The number of documents to upload in each batch. Defaults to 500.

        Raises
        ------
        APIError
            If the API response format is unexpected.
        """
        if not self._client:
            raise ValueError(
                "Can only add documents once a collection has been created"
            )

        if documents is None:
            raise ValueError("document list must be provided")

        uuids: List[str] = []
        for batch in generate_batches(documents, batch_size):
            response = self._client.post(
                f"/collections/{urllib.parse.quote_plus(self.name)}/documents",
                json=batch,
            )

            handle_response(response)

            uuids.extend(response.json())

        return uuids

    async def aupdate_document(
        self,
        uuid: str,
        document_id: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        """
        Asynchronously update document by UUID.

        Parameters
        ----------
        uuid : str
            The UUID of the document to update.
        document_id : Optional[str]
            The document_id of the document.
        metadata : Optional[Dict[str, Any]]
            The metadata of the document.

        Returns
        -------
        None

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._aclient:
            raise ValueError(
                "Can only update documents once a collection has been retrieved or"
                " created"
            )

        if uuid is None:
            raise ValueError("document uuid must be provided")

        if document_id is None and metadata is None:
            raise ValueError("document_id or metadata must be provided")

        payload = filter_dict({"document_id": document_id, "metadata": metadata})

        response = await self._aclient.patch(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
            json=payload,
        )

        handle_response(response)

    def update_document(
        self,
        uuid: str,
        document_id: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        """
        Update document by UUID.

        Parameters
        ----------
        uuid : str
            The UUID of the document to update.
        document_id : Optional[str]
            The document_id of the document.
        metadata : Optional[Dict[str, Any]]
            The metadata of the document.

        Returns
        -------
        None

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._client:
            raise ValueError(
                "Can only update documents once a collection has been retrieved or"
                " created"
            )

        if uuid is None:
            raise ValueError("document uuid must be provided")

        if document_id is None and metadata is None:
            raise ValueError("document_id or metadata must be provided")

        payload = filter_dict({"document_id": document_id, "metadata": metadata})

        response = self._client.patch(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
            json=payload,
        )

        handle_response(response)

    async def adelete_document(self, uuid: str) -> None:
        """
        Asynchronously delete document.

        Parameters
        ----------
        uuid: str
            The uuid of the document to be deleted.

        Returns
        -------
        None

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._aclient:
            raise ValueError(
                "Can only delete a document once a collection has been retrieved"
            )

        if uuid is None or uuid.strip() == "":
            raise ValueError("document uuid must be provided")

        response = await self._aclient.delete(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        )

        handle_response(response)

    def delete_document(self, uuid: str) -> None:
        """
        Delete document.

        Parameters
        ----------
        uuid: str
            The uuid of the document to be deleted.

        Returns
        -------
        None

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._client:
            raise ValueError(
                "Can only delete a document once a collection has been retrieved"
            )

        if uuid is None or uuid.strip() == "":
            raise ValueError("document uuid must be provided")

        response = self._client.delete(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        )

        handle_response(response)

    async def aget_document(self, uuid: str) -> Document:
        """
        Asynchronously gets a document.

        Parameters
        ----------
        uuid: str
            The name of the document to get.

        Returns
        -------
        Document
            The retrieved document.

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._aclient:
            raise ValueError(
                "Can only get a document once a collection has been retrieved"
            )

        if uuid is None or uuid.strip() == "":
            raise ValueError("document uuid must be provided")

        response = await self._aclient.get(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        )

        handle_response(response)

        return Document(**response.json())

    def get_document(self, uuid: str) -> Document:
        """
        Gets a document.

        Parameters
        ----------
        uuid: str
            The name of the document to get.

        Returns
        -------
        Document
            The retrieved document.

        Raises
        ------
        NotFoundError
            If the document is not found.

        APIError
            If the API response format is unexpected.
        """
        if not self._client:
            raise ValueError(
                "Can only get a document once a collection has been retrieved"
            )

        if uuid is None or uuid.strip() == "":
            raise ValueError("document uuid must be provided")

        response = self._client.get(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        )

        handle_response(response)

        return Document(**response.json())

    async def aget_documents(self, uuids: List[str]) -> List[Document]:
        """
        Asynchronously gets a list of documents.

        Parameters
        ----------
        uuids: List[str]
            The list of document uuids to get.

        Returns
        -------
        List[Document]
            The list of document objects.

        Raises
        ------
        APIError
            If the API response format is unexpected.
        """
        if not self._aclient:
            raise ValueError(
                "Can only get documents once a collection has been retrieved"
            )

        if not uuids or len(uuids) == 0:
            raise ValueError("document uuids must be provided")

        if len(uuids) > LARGE_BATCH_WARNING_LIMIT:
            warnings.warn(LARGE_BATCH_WARNING, stacklevel=2)

        response = await self._aclient.post(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get",
            json={"uuids": uuids},
        )

        handle_response(response)

        return [Document(**document) for document in response.json()]

    def get_documents(self, uuids: List[str]) -> List[Document]:
        """
        Gets a list of documents.

        Parameters
        ----------
        uuids: List[str]
            The list of document uuids to get.

        Returns
        -------
        List[Document]
            The list of document objects.

        Raises
        ------
        APIError
            If the API response format is unexpected.
        """
        if not self._client:
            raise ValueError(
                "Can only get documents once a collection has been retrieved"
            )

        if not uuids or len(uuids) == 0:
            raise ValueError("document uuids must be provided")

        if len(uuids) > LARGE_BATCH_WARNING_LIMIT:
            warnings.warn(LARGE_BATCH_WARNING, stacklevel=2)

        response = self._client.post(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get",
            json={"uuids": uuids},
        )

        handle_response(response)

        return [Document(**document) for document in response.json()]

    async def asearch_return_query_vector(
        self,
        text: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        limit: Optional[int] = None,
        search_type: Optional[str] = None,
        mmr_lambda: Optional[float] = None,
    ) -> Tuple[List[Document], List[float]]:
        if not self._aclient:
            raise ValueError(
                "Can only search documents once a collection has been retrieved"
            )

        if text is None and metadata is None:
            raise ValueError("One of text or metadata must be provided.")

        if text is not None and not isinstance(text, str):
            raise ValueError("Text must be a string.")

        search_type_value = SearchType(search_type or "similarity")

        payload = DocumentSearchPayload(
            text=text,
            metadata=metadata,
            search_type=search_type_value,
            mmr_lambda=mmr_lambda,
        )

        url = f"/collections/{urllib.parse.quote_plus(self.name)}/search"
        params = {"limit": limit} if limit is not None and limit > 0 else {}

        response = await self._aclient.post(
            url,
            params=params,
            json=payload.model_dump(exclude_none=True, exclude_unset=True),
        )

        # If the collection is not found, return an empty list
        if response.status_code == 404:
            return [], []

        # Otherwise, handle the response for other errors
        handle_response(response)

        return (
            [Document(**document) for document in response.json()["results"]],
            response.json()["query_vector"],
        )

    async def asearch(
        self,
        text: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        limit: Optional[int] = None,
        search_type: Optional[str] = None,
        mmr_lambda: Optional[float] = None,
    ) -> List[Document]:
        """
        Async search over documents in a collection based on provided search criteria.
        One of tex or metadata must be provided.

        Returns an empty list if no documents are found.

        Parameters
        ----------
        text : Optional[str], optional
            The search text.
        metadata : Optional[Dict[str, Any]], optional
            Document metadata to filter on.
        limit : Optional[int], optional
            Limit the number of returned documents.
        search_type : Optional[str], optional
            The type of search to perform. Defaults to "similarity".
            Must be one of "similarity" or "mmr".
        mmr_lambda : Optional[float], optional
            The lambda parameter for the MMR Reranking Algorithm.

        Returns
        -------
        List[Document]
            The list of documents that match the search criteria.

        Raises
        ------
        APIError
            If the API response format is unexpected or there's an error from the API.
        """

        results, _ = await self.asearch_return_query_vector(
            text=text,
            metadata=metadata,
            limit=limit,
            search_type=search_type,
            mmr_lambda=mmr_lambda,
        )

        return results

    def search_return_query_vector(
        self,
        text: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        limit: Optional[int] = None,
        search_type: Optional[str] = None,
        mmr_lambda: Optional[float] = None,
    ) -> Tuple[List[Document], List[float]]:
        if not self._client:
            raise ValueError(
                "Can only search documents once a collection has been retrieved"
            )

        if text is None is None and metadata is None:
            raise ValueError("One of text or metadata must be provided.")

        if text is not None and not isinstance(text, str):
            raise ValueError("Text must be a string.")

        search_type_value = SearchType(search_type or "similarity")

        payload = DocumentSearchPayload(
            text=text,
            metadata=metadata,
            search_type=search_type_value,
            mmr_lambda=mmr_lambda,
        )

        url = f"/collections/{urllib.parse.quote_plus(self.name)}/search"
        params = {"limit": limit} if limit is not None and limit > 0 else {}

        response = self._client.post(
            url,
            params=params,
            json=payload.model_dump(exclude_none=True, exclude_unset=True),
        )

        # If the collection is not found, return an empty list
        if response.status_code == 404:
            return [], []

        # Otherwise, handle the response for other errors
        handle_response(response)

        return (
            [Document(**document) for document in response.json()["results"]],
            response.json()["query_vector"],
        )

    def search(
        self,
        text: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        limit: Optional[int] = None,
        search_type: Optional[str] = None,
        mmr_lambda: Optional[float] = None,
    ) -> List[Document]:
        """
        Searches over documents in a collection based on provided search criteria.
        One of text, or metadata must be provided.

        Returns an empty list if no documents are found.

        Parameters
        ----------
        text : Optional[str], optional
            The search text.
        metadata : Optional[Dict[str, Any]], optional
            Document metadata to filter on.
        limit : Optional[int], optional
            Limit the number of returned documents.
        search_type : Optional[str], optional
            The type of search to perform. Defaults to "similarity".
            Must be one of "similarity" or "mmr".
        mmr_lambda : Optional[float], optional
            The lambda parameter for the MMR Reranking Algorithm.

        Returns
        -------
        List[Document]
            The list of documents that match the search criteria.

        Raises
        ------
        APIError
            If the API response format is unexpected or there's an error from the API.
        """

        results, _ = self.search_return_query_vector(
            text=text,
            metadata=metadata,
            limit=limit,
            search_type=search_type,
            mmr_lambda=mmr_lambda,
        )

        return results

Ancestors

Class variables

var model_computed_fields
var model_config
var model_fields

Instance variables

var status : str

Get the status of the collection.

Returns

str

The status of the collection.

ready: All documents have been embedded and the collection is ready for search.

pending: The collection is still processing.

Expand source code
@property
def status(self) -> str:
    """
    Get the status of the collection.

    Returns
    -------
    str
        The status of the collection.

        `ready`: All documents have been embedded and the collection is ready for
        search.

        `pending`: The collection is still processing.
    """
    if self.document_count and (
        self.document_embedded_count == self.document_count
    ):
        return "ready"
    else:
        return "pending"

Methods

async def aadd_documents(self, documents: List[Document], batch_size: int = 1000) ‑> List[str]

Asynchronously create documents.

documents : List[Document] A list of Document objects representing the documents to create. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500.

Returns

List[str]
The UUIDs of the created documents.

Raises

APIError
If the API response format is unexpected.
Expand source code
async def aadd_documents(
    self,
    documents: List[Document],
    batch_size: int = DEFAULT_BATCH_SIZE,
) -> List[str]:
    """
    Asynchronously create documents.


    documents : List[Document]
        A list of Document objects representing the documents to create.
    batch_size : int, optional
        The number of documents to upload in each batch. Defaults to 500.

    Returns
    -------
    List[str]
        The UUIDs of the created documents.

    Raises
    ------
    APIError
        If the API response format is unexpected.
    """

    if not self._aclient:
        raise ValueError(
            "Can only add documents once a collection has been created"
        )

    if documents is None:
        raise ValueError("document list must be provided")

    uuids: List[str] = []
    for batch in generate_batches(documents, batch_size):
        response = await self._aclient.post(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents",
            json=batch,
        )

        handle_response(response)

        uuids.extend(response.json())

    return uuids
def add_documents(self, documents: List[Document], batch_size: int = 1000) ‑> List[str]

Create documents.

documents : List[Document] A list of Document objects representing the documents to create.

Returns

List[str]
The UUIDs of the created documents.
batch_size : int, optional
The number of documents to upload in each batch. Defaults to 500.

Raises

APIError
If the API response format is unexpected.
Expand source code
def add_documents(
    self,
    documents: List[Document],
    batch_size: int = DEFAULT_BATCH_SIZE,
) -> List[str]:
    """
    Create documents.


    documents : List[Document]
        A list of Document objects representing the documents to create.

    Returns
    -------
    List[str]
        The UUIDs of the created documents.
    batch_size : int, optional
        The number of documents to upload in each batch. Defaults to 500.

    Raises
    ------
    APIError
        If the API response format is unexpected.
    """
    if not self._client:
        raise ValueError(
            "Can only add documents once a collection has been created"
        )

    if documents is None:
        raise ValueError("document list must be provided")

    uuids: List[str] = []
    for batch in generate_batches(documents, batch_size):
        response = self._client.post(
            f"/collections/{urllib.parse.quote_plus(self.name)}/documents",
            json=batch,
        )

        handle_response(response)

        uuids.extend(response.json())

    return uuids
async def adelete_document(self, uuid: str) ‑> None

Asynchronously delete document.

Parameters

uuid : str
The uuid of the document to be deleted.

Returns

None
 

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
async def adelete_document(self, uuid: str) -> None:
    """
    Asynchronously delete document.

    Parameters
    ----------
    uuid: str
        The uuid of the document to be deleted.

    Returns
    -------
    None

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._aclient:
        raise ValueError(
            "Can only delete a document once a collection has been retrieved"
        )

    if uuid is None or uuid.strip() == "":
        raise ValueError("document uuid must be provided")

    response = await self._aclient.delete(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
    )

    handle_response(response)
async def aget_document(self, uuid: str) ‑> Document

Asynchronously gets a document.

Parameters

uuid : str
The name of the document to get.

Returns

Document
The retrieved document.

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
async def aget_document(self, uuid: str) -> Document:
    """
    Asynchronously gets a document.

    Parameters
    ----------
    uuid: str
        The name of the document to get.

    Returns
    -------
    Document
        The retrieved document.

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._aclient:
        raise ValueError(
            "Can only get a document once a collection has been retrieved"
        )

    if uuid is None or uuid.strip() == "":
        raise ValueError("document uuid must be provided")

    response = await self._aclient.get(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
    )

    handle_response(response)

    return Document(**response.json())
async def aget_documents(self, uuids: List[str]) ‑> List[Document]

Asynchronously gets a list of documents.

Parameters

uuids : List[str]
The list of document uuids to get.

Returns

List[Document]
The list of document objects.

Raises

APIError
If the API response format is unexpected.
Expand source code
async def aget_documents(self, uuids: List[str]) -> List[Document]:
    """
    Asynchronously gets a list of documents.

    Parameters
    ----------
    uuids: List[str]
        The list of document uuids to get.

    Returns
    -------
    List[Document]
        The list of document objects.

    Raises
    ------
    APIError
        If the API response format is unexpected.
    """
    if not self._aclient:
        raise ValueError(
            "Can only get documents once a collection has been retrieved"
        )

    if not uuids or len(uuids) == 0:
        raise ValueError("document uuids must be provided")

    if len(uuids) > LARGE_BATCH_WARNING_LIMIT:
        warnings.warn(LARGE_BATCH_WARNING, stacklevel=2)

    response = await self._aclient.post(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get",
        json={"uuids": uuids},
    )

    handle_response(response)

    return [Document(**document) for document in response.json()]
async def asearch(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> List[Document]

Async search over documents in a collection based on provided search criteria. One of tex or metadata must be provided.

Returns an empty list if no documents are found.

Parameters

text : Optional[str], optional
The search text.
metadata : Optional[Dict[str, Any]], optional
Document metadata to filter on.
limit : Optional[int], optional
Limit the number of returned documents.
search_type : Optional[str], optional
The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr".
mmr_lambda : Optional[float], optional
The lambda parameter for the MMR Reranking Algorithm.

Returns

List[Document]
The list of documents that match the search criteria.

Raises

APIError
If the API response format is unexpected or there's an error from the API.
Expand source code
async def asearch(
    self,
    text: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    limit: Optional[int] = None,
    search_type: Optional[str] = None,
    mmr_lambda: Optional[float] = None,
) -> List[Document]:
    """
    Async search over documents in a collection based on provided search criteria.
    One of tex or metadata must be provided.

    Returns an empty list if no documents are found.

    Parameters
    ----------
    text : Optional[str], optional
        The search text.
    metadata : Optional[Dict[str, Any]], optional
        Document metadata to filter on.
    limit : Optional[int], optional
        Limit the number of returned documents.
    search_type : Optional[str], optional
        The type of search to perform. Defaults to "similarity".
        Must be one of "similarity" or "mmr".
    mmr_lambda : Optional[float], optional
        The lambda parameter for the MMR Reranking Algorithm.

    Returns
    -------
    List[Document]
        The list of documents that match the search criteria.

    Raises
    ------
    APIError
        If the API response format is unexpected or there's an error from the API.
    """

    results, _ = await self.asearch_return_query_vector(
        text=text,
        metadata=metadata,
        limit=limit,
        search_type=search_type,
        mmr_lambda=mmr_lambda,
    )

    return results
async def asearch_return_query_vector(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> Tuple[List[Document], List[float]]
Expand source code
async def asearch_return_query_vector(
    self,
    text: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    limit: Optional[int] = None,
    search_type: Optional[str] = None,
    mmr_lambda: Optional[float] = None,
) -> Tuple[List[Document], List[float]]:
    if not self._aclient:
        raise ValueError(
            "Can only search documents once a collection has been retrieved"
        )

    if text is None and metadata is None:
        raise ValueError("One of text or metadata must be provided.")

    if text is not None and not isinstance(text, str):
        raise ValueError("Text must be a string.")

    search_type_value = SearchType(search_type or "similarity")

    payload = DocumentSearchPayload(
        text=text,
        metadata=metadata,
        search_type=search_type_value,
        mmr_lambda=mmr_lambda,
    )

    url = f"/collections/{urllib.parse.quote_plus(self.name)}/search"
    params = {"limit": limit} if limit is not None and limit > 0 else {}

    response = await self._aclient.post(
        url,
        params=params,
        json=payload.model_dump(exclude_none=True, exclude_unset=True),
    )

    # If the collection is not found, return an empty list
    if response.status_code == 404:
        return [], []

    # Otherwise, handle the response for other errors
    handle_response(response)

    return (
        [Document(**document) for document in response.json()["results"]],
        response.json()["query_vector"],
    )
async def aupdate_document(self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) ‑> None

Asynchronously update document by UUID.

Parameters

uuid : str
The UUID of the document to update.
document_id : Optional[str]
The document_id of the document.
metadata : Optional[Dict[str, Any]]
The metadata of the document.

Returns

None
 

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
async def aupdate_document(
    self,
    uuid: str,
    document_id: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
) -> None:
    """
    Asynchronously update document by UUID.

    Parameters
    ----------
    uuid : str
        The UUID of the document to update.
    document_id : Optional[str]
        The document_id of the document.
    metadata : Optional[Dict[str, Any]]
        The metadata of the document.

    Returns
    -------
    None

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._aclient:
        raise ValueError(
            "Can only update documents once a collection has been retrieved or"
            " created"
        )

    if uuid is None:
        raise ValueError("document uuid must be provided")

    if document_id is None and metadata is None:
        raise ValueError("document_id or metadata must be provided")

    payload = filter_dict({"document_id": document_id, "metadata": metadata})

    response = await self._aclient.patch(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        json=payload,
    )

    handle_response(response)
def delete_document(self, uuid: str) ‑> None

Delete document.

Parameters

uuid : str
The uuid of the document to be deleted.

Returns

None
 

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
def delete_document(self, uuid: str) -> None:
    """
    Delete document.

    Parameters
    ----------
    uuid: str
        The uuid of the document to be deleted.

    Returns
    -------
    None

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._client:
        raise ValueError(
            "Can only delete a document once a collection has been retrieved"
        )

    if uuid is None or uuid.strip() == "":
        raise ValueError("document uuid must be provided")

    response = self._client.delete(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
    )

    handle_response(response)
def get_document(self, uuid: str) ‑> Document

Gets a document.

Parameters

uuid : str
The name of the document to get.

Returns

Document
The retrieved document.

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
def get_document(self, uuid: str) -> Document:
    """
    Gets a document.

    Parameters
    ----------
    uuid: str
        The name of the document to get.

    Returns
    -------
    Document
        The retrieved document.

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._client:
        raise ValueError(
            "Can only get a document once a collection has been retrieved"
        )

    if uuid is None or uuid.strip() == "":
        raise ValueError("document uuid must be provided")

    response = self._client.get(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
    )

    handle_response(response)

    return Document(**response.json())
def get_documents(self, uuids: List[str]) ‑> List[Document]

Gets a list of documents.

Parameters

uuids : List[str]
The list of document uuids to get.

Returns

List[Document]
The list of document objects.

Raises

APIError
If the API response format is unexpected.
Expand source code
def get_documents(self, uuids: List[str]) -> List[Document]:
    """
    Gets a list of documents.

    Parameters
    ----------
    uuids: List[str]
        The list of document uuids to get.

    Returns
    -------
    List[Document]
        The list of document objects.

    Raises
    ------
    APIError
        If the API response format is unexpected.
    """
    if not self._client:
        raise ValueError(
            "Can only get documents once a collection has been retrieved"
        )

    if not uuids or len(uuids) == 0:
        raise ValueError("document uuids must be provided")

    if len(uuids) > LARGE_BATCH_WARNING_LIMIT:
        warnings.warn(LARGE_BATCH_WARNING, stacklevel=2)

    response = self._client.post(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get",
        json={"uuids": uuids},
    )

    handle_response(response)

    return [Document(**document) for document in response.json()]
def model_post_init(self: BaseModel, __context: Any) ‑> None

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args

self
The BaseModel instance.
__context
The context.
Expand source code
def init_private_attributes(self: BaseModel, __context: Any) -> None:
    """This function is meant to behave like a BaseModel method to initialise private attributes.

    It takes context as an argument since that's what pydantic-core passes when calling it.

    Args:
        self: The BaseModel instance.
        __context: The context.
    """
    if getattr(self, '__pydantic_private__', None) is None:
        pydantic_private = {}
        for name, private_attr in self.__private_attributes__.items():
            default = private_attr.get_default()
            if default is not PydanticUndefined:
                pydantic_private[name] = default
        object_setattr(self, '__pydantic_private__', pydantic_private)
def search(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> List[Document]

Searches over documents in a collection based on provided search criteria. One of text, or metadata must be provided.

Returns an empty list if no documents are found.

Parameters

text : Optional[str], optional
The search text.
metadata : Optional[Dict[str, Any]], optional
Document metadata to filter on.
limit : Optional[int], optional
Limit the number of returned documents.
search_type : Optional[str], optional
The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr".
mmr_lambda : Optional[float], optional
The lambda parameter for the MMR Reranking Algorithm.

Returns

List[Document]
The list of documents that match the search criteria.

Raises

APIError
If the API response format is unexpected or there's an error from the API.
Expand source code
def search(
    self,
    text: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    limit: Optional[int] = None,
    search_type: Optional[str] = None,
    mmr_lambda: Optional[float] = None,
) -> List[Document]:
    """
    Searches over documents in a collection based on provided search criteria.
    One of text, or metadata must be provided.

    Returns an empty list if no documents are found.

    Parameters
    ----------
    text : Optional[str], optional
        The search text.
    metadata : Optional[Dict[str, Any]], optional
        Document metadata to filter on.
    limit : Optional[int], optional
        Limit the number of returned documents.
    search_type : Optional[str], optional
        The type of search to perform. Defaults to "similarity".
        Must be one of "similarity" or "mmr".
    mmr_lambda : Optional[float], optional
        The lambda parameter for the MMR Reranking Algorithm.

    Returns
    -------
    List[Document]
        The list of documents that match the search criteria.

    Raises
    ------
    APIError
        If the API response format is unexpected or there's an error from the API.
    """

    results, _ = self.search_return_query_vector(
        text=text,
        metadata=metadata,
        limit=limit,
        search_type=search_type,
        mmr_lambda=mmr_lambda,
    )

    return results
def search_return_query_vector(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> Tuple[List[Document], List[float]]
Expand source code
def search_return_query_vector(
    self,
    text: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    limit: Optional[int] = None,
    search_type: Optional[str] = None,
    mmr_lambda: Optional[float] = None,
) -> Tuple[List[Document], List[float]]:
    if not self._client:
        raise ValueError(
            "Can only search documents once a collection has been retrieved"
        )

    if text is None is None and metadata is None:
        raise ValueError("One of text or metadata must be provided.")

    if text is not None and not isinstance(text, str):
        raise ValueError("Text must be a string.")

    search_type_value = SearchType(search_type or "similarity")

    payload = DocumentSearchPayload(
        text=text,
        metadata=metadata,
        search_type=search_type_value,
        mmr_lambda=mmr_lambda,
    )

    url = f"/collections/{urllib.parse.quote_plus(self.name)}/search"
    params = {"limit": limit} if limit is not None and limit > 0 else {}

    response = self._client.post(
        url,
        params=params,
        json=payload.model_dump(exclude_none=True, exclude_unset=True),
    )

    # If the collection is not found, return an empty list
    if response.status_code == 404:
        return [], []

    # Otherwise, handle the response for other errors
    handle_response(response)

    return (
        [Document(**document) for document in response.json()["results"]],
        response.json()["query_vector"],
    )
def update_document(self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) ‑> None

Update document by UUID.

Parameters

uuid : str
The UUID of the document to update.
document_id : Optional[str]
The document_id of the document.
metadata : Optional[Dict[str, Any]]
The metadata of the document.

Returns

None
 

Raises

NotFoundError
If the document is not found.
APIError
If the API response format is unexpected.
Expand source code
def update_document(
    self,
    uuid: str,
    document_id: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
) -> None:
    """
    Update document by UUID.

    Parameters
    ----------
    uuid : str
        The UUID of the document to update.
    document_id : Optional[str]
        The document_id of the document.
    metadata : Optional[Dict[str, Any]]
        The metadata of the document.

    Returns
    -------
    None

    Raises
    ------
    NotFoundError
        If the document is not found.

    APIError
        If the API response format is unexpected.
    """
    if not self._client:
        raise ValueError(
            "Can only update documents once a collection has been retrieved or"
            " created"
        )

    if uuid is None:
        raise ValueError("document uuid must be provided")

    if document_id is None and metadata is None:
        raise ValueError("document_id or metadata must be provided")

    payload = filter_dict({"document_id": document_id, "metadata": metadata})

    response = self._client.patch(
        f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}",
        json=payload,
    )

    handle_response(response)

Inherited members