Module zep_python.document
Expand source code
from zep_python.document.collections import DocumentCollection
from zep_python.document.models import Document
__all__ = [
"Document",
"DocumentCollection",
]
Sub-modules
zep_python.document.client
zep_python.document.collections
zep_python.document.models
Classes
class Document (**data: Any)
-
Represents a document base.
Attributes
uuid
:Optional[str]
- The unique identifier of the document.
created_at
:Optional[datetime]
- The timestamp of when the document was created.
updated_at
:Optional[datetime]
- The timestamp of when the document was last updated.
document_id
:Optional[str]
- The unique identifier of the document (name or some id).
content
:str
- The content of the document.
metadata
:Optional[Dict[str, Any]]
- Any additional metadata associated with the document.
is_embedded
:Optional[bool]
- Whether the document has an embedding.
embedding
:Optional[List[float]]
- The embedding of the document.
score
:Optional[float]
- The normed score of the search result. Available only when the document is returned as part of a query result.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class Document(BaseModel): """ Represents a document base. Attributes ---------- uuid : Optional[str] The unique identifier of the document. created_at : Optional[datetime] The timestamp of when the document was created. updated_at : Optional[datetime] The timestamp of when the document was last updated. document_id : Optional[str] The unique identifier of the document (name or some id). content : str The content of the document. metadata : Optional[Dict[str, Any]] Any additional metadata associated with the document. is_embedded : Optional[bool] Whether the document has an embedding. embedding : Optional[List[float]] The embedding of the document. score : Optional[float] The normed score of the search result. Available only when the document is returned as part of a query result. """ uuid: Optional[str] = None created_at: Optional[datetime] = None updated_at: Optional[datetime] = None document_id: Optional[str] = Field(default=None, max_length=100) content: str = Field(..., min_length=1) metadata: Optional[Dict[str, Any]] = Field(default_factory=dict) is_embedded: Optional[bool] = None embedding: Optional[List[float]] = None score: Optional[float] = None def to_dict(self) -> Dict[str, Any]: """ Returns a dictionary representation of the document. Returns ------- Dict[str, Any] A dictionary containing the attributes of the document. """ return self.model_dump()
Ancestors
- pydantic.main.BaseModel
Class variables
var content : str
var created_at : Optional[datetime.datetime]
var document_id : Optional[str]
var embedding : Optional[List[float]]
var is_embedded : Optional[bool]
var metadata : Optional[Dict[str, Any]]
var model_computed_fields
var model_config
var model_fields
var score : Optional[float]
var updated_at : Optional[datetime.datetime]
var uuid : Optional[str]
Methods
def to_dict(self) ‑> Dict[str, Any]
-
Returns a dictionary representation of the document.
Returns
Dict[str, Any]
- A dictionary containing the attributes of the document.
Expand source code
def to_dict(self) -> Dict[str, Any]: """ Returns a dictionary representation of the document. Returns ------- Dict[str, Any] A dictionary containing the attributes of the document. """ return self.model_dump()
class DocumentCollection (aclient: Optional[httpx.AsyncClient] = None, client: Optional[httpx.Client] = None, **kwargs: Any)
-
Represents a collection of documents.
Attributes
uuid
:str
- The unique identifier of the collection.
created_at
:Optional[datetime]
- The timestamp of when the collection was created.
updated_at
:Optional[datetime]
- The timestamp of when the collection was last updated.
name
:str
- The unique name of the collection.
description
:Optional[str]
- The description of the collection.
metadata
:Optional[Dict[str, Any]]
- Any additional metadata associated with the collection.
embedding_dimensions
:int
- The dimensions of the embedding model.
is_auto_embedded
:bool
- Flag to indicate whether the documents in the collection should be automatically embedded by Zep. (Default: True)
is_indexed
:bool
- Flag indicating whether an index has been created for this collection.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class DocumentCollection(DocumentCollectionModel): __doc__ = DocumentCollectionModel.__doc__ or "" _client: Optional[httpx.Client] = PrivateAttr(default=None) _aclient: Optional[httpx.AsyncClient] = PrivateAttr(default=None) def __init__( self, aclient: Optional[httpx.AsyncClient] = None, client: Optional[httpx.Client] = None, **kwargs: Any, ) -> None: super().__init__(**kwargs) self._aclient = aclient self._client = client @property def status(self) -> str: """ Get the status of the collection. Returns ------- str The status of the collection. `ready`: All documents have been embedded and the collection is ready for search. `pending`: The collection is still processing. """ if self.document_count and ( self.document_embedded_count == self.document_count ): return "ready" else: return "pending" async def aadd_documents( self, documents: List[Document], batch_size: int = DEFAULT_BATCH_SIZE, ) -> List[str]: """ Asynchronously create documents. documents : List[Document] A list of Document objects representing the documents to create. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500. Returns ------- List[str] The UUIDs of the created documents. Raises ------ APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only add documents once a collection has been created" ) if documents is None: raise ValueError("document list must be provided") uuids: List[str] = [] for batch in generate_batches(documents, batch_size): response = await self._aclient.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents", json=batch, ) handle_response(response) uuids.extend(response.json()) return uuids def add_documents( self, documents: List[Document], batch_size: int = DEFAULT_BATCH_SIZE, ) -> List[str]: """ Create documents. documents : List[Document] A list of Document objects representing the documents to create. Returns ------- List[str] The UUIDs of the created documents. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500. Raises ------ APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only add documents once a collection has been created" ) if documents is None: raise ValueError("document list must be provided") uuids: List[str] = [] for batch in generate_batches(documents, batch_size): response = self._client.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents", json=batch, ) handle_response(response) uuids.extend(response.json()) return uuids async def aupdate_document( self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ Asynchronously update document by UUID. Parameters ---------- uuid : str The UUID of the document to update. document_id : Optional[str] The document_id of the document. metadata : Optional[Dict[str, Any]] The metadata of the document. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only update documents once a collection has been retrieved or" " created" ) if uuid is None: raise ValueError("document uuid must be provided") if document_id is None and metadata is None: raise ValueError("document_id or metadata must be provided") payload = filter_dict({"document_id": document_id, "metadata": metadata}) response = await self._aclient.patch( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", json=payload, ) handle_response(response) def update_document( self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ Update document by UUID. Parameters ---------- uuid : str The UUID of the document to update. document_id : Optional[str] The document_id of the document. metadata : Optional[Dict[str, Any]] The metadata of the document. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only update documents once a collection has been retrieved or" " created" ) if uuid is None: raise ValueError("document uuid must be provided") if document_id is None and metadata is None: raise ValueError("document_id or metadata must be provided") payload = filter_dict({"document_id": document_id, "metadata": metadata}) response = self._client.patch( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", json=payload, ) handle_response(response) async def adelete_document(self, uuid: str) -> None: """ Asynchronously delete document. Parameters ---------- uuid: str The uuid of the document to be deleted. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only delete a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = await self._aclient.delete( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) def delete_document(self, uuid: str) -> None: """ Delete document. Parameters ---------- uuid: str The uuid of the document to be deleted. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only delete a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = self._client.delete( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) async def aget_document(self, uuid: str) -> Document: """ Asynchronously gets a document. Parameters ---------- uuid: str The name of the document to get. Returns ------- Document The retrieved document. Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only get a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = await self._aclient.get( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) return Document(**response.json()) def get_document(self, uuid: str) -> Document: """ Gets a document. Parameters ---------- uuid: str The name of the document to get. Returns ------- Document The retrieved document. Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only get a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = self._client.get( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) return Document(**response.json()) async def aget_documents(self, uuids: List[str]) -> List[Document]: """ Asynchronously gets a list of documents. Parameters ---------- uuids: List[str] The list of document uuids to get. Returns ------- List[Document] The list of document objects. Raises ------ APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only get documents once a collection has been retrieved" ) if not uuids or len(uuids) == 0: raise ValueError("document uuids must be provided") if len(uuids) > LARGE_BATCH_WARNING_LIMIT: warnings.warn(LARGE_BATCH_WARNING, stacklevel=2) response = await self._aclient.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get", json={"uuids": uuids}, ) handle_response(response) return [Document(**document) for document in response.json()] def get_documents(self, uuids: List[str]) -> List[Document]: """ Gets a list of documents. Parameters ---------- uuids: List[str] The list of document uuids to get. Returns ------- List[Document] The list of document objects. Raises ------ APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only get documents once a collection has been retrieved" ) if not uuids or len(uuids) == 0: raise ValueError("document uuids must be provided") if len(uuids) > LARGE_BATCH_WARNING_LIMIT: warnings.warn(LARGE_BATCH_WARNING, stacklevel=2) response = self._client.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get", json={"uuids": uuids}, ) handle_response(response) return [Document(**document) for document in response.json()] async def asearch_return_query_vector( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> Tuple[List[Document], List[float]]: if not self._aclient: raise ValueError( "Can only search documents once a collection has been retrieved" ) if text is None and metadata is None: raise ValueError("One of text or metadata must be provided.") if text is not None and not isinstance(text, str): raise ValueError("Text must be a string.") search_type_value = SearchType(search_type or "similarity") payload = DocumentSearchPayload( text=text, metadata=metadata, search_type=search_type_value, mmr_lambda=mmr_lambda, ) url = f"/collections/{urllib.parse.quote_plus(self.name)}/search" params = {"limit": limit} if limit is not None and limit > 0 else {} response = await self._aclient.post( url, params=params, json=payload.model_dump(exclude_none=True, exclude_unset=True), ) # If the collection is not found, return an empty list if response.status_code == 404: return [], [] # Otherwise, handle the response for other errors handle_response(response) return ( [Document(**document) for document in response.json()["results"]], response.json()["query_vector"], ) async def asearch( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> List[Document]: """ Async search over documents in a collection based on provided search criteria. One of tex or metadata must be provided. Returns an empty list if no documents are found. Parameters ---------- text : Optional[str], optional The search text. metadata : Optional[Dict[str, Any]], optional Document metadata to filter on. limit : Optional[int], optional Limit the number of returned documents. search_type : Optional[str], optional The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr". mmr_lambda : Optional[float], optional The lambda parameter for the MMR Reranking Algorithm. Returns ------- List[Document] The list of documents that match the search criteria. Raises ------ APIError If the API response format is unexpected or there's an error from the API. """ results, _ = await self.asearch_return_query_vector( text=text, metadata=metadata, limit=limit, search_type=search_type, mmr_lambda=mmr_lambda, ) return results def search_return_query_vector( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> Tuple[List[Document], List[float]]: if not self._client: raise ValueError( "Can only search documents once a collection has been retrieved" ) if text is None is None and metadata is None: raise ValueError("One of text or metadata must be provided.") if text is not None and not isinstance(text, str): raise ValueError("Text must be a string.") search_type_value = SearchType(search_type or "similarity") payload = DocumentSearchPayload( text=text, metadata=metadata, search_type=search_type_value, mmr_lambda=mmr_lambda, ) url = f"/collections/{urllib.parse.quote_plus(self.name)}/search" params = {"limit": limit} if limit is not None and limit > 0 else {} response = self._client.post( url, params=params, json=payload.model_dump(exclude_none=True, exclude_unset=True), ) # If the collection is not found, return an empty list if response.status_code == 404: return [], [] # Otherwise, handle the response for other errors handle_response(response) return ( [Document(**document) for document in response.json()["results"]], response.json()["query_vector"], ) def search( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> List[Document]: """ Searches over documents in a collection based on provided search criteria. One of text, or metadata must be provided. Returns an empty list if no documents are found. Parameters ---------- text : Optional[str], optional The search text. metadata : Optional[Dict[str, Any]], optional Document metadata to filter on. limit : Optional[int], optional Limit the number of returned documents. search_type : Optional[str], optional The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr". mmr_lambda : Optional[float], optional The lambda parameter for the MMR Reranking Algorithm. Returns ------- List[Document] The list of documents that match the search criteria. Raises ------ APIError If the API response format is unexpected or there's an error from the API. """ results, _ = self.search_return_query_vector( text=text, metadata=metadata, limit=limit, search_type=search_type, mmr_lambda=mmr_lambda, ) return results
Ancestors
- DocumentCollectionModel
- pydantic.main.BaseModel
Class variables
var model_computed_fields
var model_config
var model_fields
Instance variables
var status : str
-
Get the status of the collection.
Returns
str
-
The status of the collection.
ready
: All documents have been embedded and the collection is ready for search.pending
: The collection is still processing.
Expand source code
@property def status(self) -> str: """ Get the status of the collection. Returns ------- str The status of the collection. `ready`: All documents have been embedded and the collection is ready for search. `pending`: The collection is still processing. """ if self.document_count and ( self.document_embedded_count == self.document_count ): return "ready" else: return "pending"
Methods
async def aadd_documents(self, documents: List[Document], batch_size: int = 1000) ‑> List[str]
-
Asynchronously create documents.
documents : List[Document] A list of Document objects representing the documents to create. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500.
Returns
List[str]
- The UUIDs of the created documents.
Raises
APIError
- If the API response format is unexpected.
Expand source code
async def aadd_documents( self, documents: List[Document], batch_size: int = DEFAULT_BATCH_SIZE, ) -> List[str]: """ Asynchronously create documents. documents : List[Document] A list of Document objects representing the documents to create. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500. Returns ------- List[str] The UUIDs of the created documents. Raises ------ APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only add documents once a collection has been created" ) if documents is None: raise ValueError("document list must be provided") uuids: List[str] = [] for batch in generate_batches(documents, batch_size): response = await self._aclient.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents", json=batch, ) handle_response(response) uuids.extend(response.json()) return uuids
def add_documents(self, documents: List[Document], batch_size: int = 1000) ‑> List[str]
-
Create documents.
documents : List[Document] A list of Document objects representing the documents to create.
Returns
List[str]
- The UUIDs of the created documents.
batch_size
:int
, optional- The number of documents to upload in each batch. Defaults to 500.
Raises
APIError
- If the API response format is unexpected.
Expand source code
def add_documents( self, documents: List[Document], batch_size: int = DEFAULT_BATCH_SIZE, ) -> List[str]: """ Create documents. documents : List[Document] A list of Document objects representing the documents to create. Returns ------- List[str] The UUIDs of the created documents. batch_size : int, optional The number of documents to upload in each batch. Defaults to 500. Raises ------ APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only add documents once a collection has been created" ) if documents is None: raise ValueError("document list must be provided") uuids: List[str] = [] for batch in generate_batches(documents, batch_size): response = self._client.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents", json=batch, ) handle_response(response) uuids.extend(response.json()) return uuids
async def adelete_document(self, uuid: str) ‑> None
-
Asynchronously delete document.
Parameters
uuid
:str
- The uuid of the document to be deleted.
Returns
None
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
async def adelete_document(self, uuid: str) -> None: """ Asynchronously delete document. Parameters ---------- uuid: str The uuid of the document to be deleted. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only delete a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = await self._aclient.delete( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response)
async def aget_document(self, uuid: str) ‑> Document
-
Asynchronously gets a document.
Parameters
uuid
:str
- The name of the document to get.
Returns
Document
- The retrieved document.
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
async def aget_document(self, uuid: str) -> Document: """ Asynchronously gets a document. Parameters ---------- uuid: str The name of the document to get. Returns ------- Document The retrieved document. Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only get a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = await self._aclient.get( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) return Document(**response.json())
async def aget_documents(self, uuids: List[str]) ‑> List[Document]
-
Asynchronously gets a list of documents.
Parameters
uuids
:List[str]
- The list of document uuids to get.
Returns
List[Document]
- The list of document objects.
Raises
APIError
- If the API response format is unexpected.
Expand source code
async def aget_documents(self, uuids: List[str]) -> List[Document]: """ Asynchronously gets a list of documents. Parameters ---------- uuids: List[str] The list of document uuids to get. Returns ------- List[Document] The list of document objects. Raises ------ APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only get documents once a collection has been retrieved" ) if not uuids or len(uuids) == 0: raise ValueError("document uuids must be provided") if len(uuids) > LARGE_BATCH_WARNING_LIMIT: warnings.warn(LARGE_BATCH_WARNING, stacklevel=2) response = await self._aclient.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get", json={"uuids": uuids}, ) handle_response(response) return [Document(**document) for document in response.json()]
async def asearch(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> List[Document]
-
Async search over documents in a collection based on provided search criteria. One of tex or metadata must be provided.
Returns an empty list if no documents are found.
Parameters
text
:Optional[str]
, optional- The search text.
metadata
:Optional[Dict[str, Any]]
, optional- Document metadata to filter on.
limit
:Optional[int]
, optional- Limit the number of returned documents.
search_type
:Optional[str]
, optional- The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr".
mmr_lambda
:Optional[float]
, optional- The lambda parameter for the MMR Reranking Algorithm.
Returns
List[Document]
- The list of documents that match the search criteria.
Raises
APIError
- If the API response format is unexpected or there's an error from the API.
Expand source code
async def asearch( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> List[Document]: """ Async search over documents in a collection based on provided search criteria. One of tex or metadata must be provided. Returns an empty list if no documents are found. Parameters ---------- text : Optional[str], optional The search text. metadata : Optional[Dict[str, Any]], optional Document metadata to filter on. limit : Optional[int], optional Limit the number of returned documents. search_type : Optional[str], optional The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr". mmr_lambda : Optional[float], optional The lambda parameter for the MMR Reranking Algorithm. Returns ------- List[Document] The list of documents that match the search criteria. Raises ------ APIError If the API response format is unexpected or there's an error from the API. """ results, _ = await self.asearch_return_query_vector( text=text, metadata=metadata, limit=limit, search_type=search_type, mmr_lambda=mmr_lambda, ) return results
async def asearch_return_query_vector(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> Tuple[List[Document], List[float]]
-
Expand source code
async def asearch_return_query_vector( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> Tuple[List[Document], List[float]]: if not self._aclient: raise ValueError( "Can only search documents once a collection has been retrieved" ) if text is None and metadata is None: raise ValueError("One of text or metadata must be provided.") if text is not None and not isinstance(text, str): raise ValueError("Text must be a string.") search_type_value = SearchType(search_type or "similarity") payload = DocumentSearchPayload( text=text, metadata=metadata, search_type=search_type_value, mmr_lambda=mmr_lambda, ) url = f"/collections/{urllib.parse.quote_plus(self.name)}/search" params = {"limit": limit} if limit is not None and limit > 0 else {} response = await self._aclient.post( url, params=params, json=payload.model_dump(exclude_none=True, exclude_unset=True), ) # If the collection is not found, return an empty list if response.status_code == 404: return [], [] # Otherwise, handle the response for other errors handle_response(response) return ( [Document(**document) for document in response.json()["results"]], response.json()["query_vector"], )
async def aupdate_document(self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) ‑> None
-
Asynchronously update document by UUID.
Parameters
uuid
:str
- The UUID of the document to update.
document_id
:Optional[str]
- The document_id of the document.
metadata
:Optional[Dict[str, Any]]
- The metadata of the document.
Returns
None
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
async def aupdate_document( self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ Asynchronously update document by UUID. Parameters ---------- uuid : str The UUID of the document to update. document_id : Optional[str] The document_id of the document. metadata : Optional[Dict[str, Any]] The metadata of the document. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._aclient: raise ValueError( "Can only update documents once a collection has been retrieved or" " created" ) if uuid is None: raise ValueError("document uuid must be provided") if document_id is None and metadata is None: raise ValueError("document_id or metadata must be provided") payload = filter_dict({"document_id": document_id, "metadata": metadata}) response = await self._aclient.patch( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", json=payload, ) handle_response(response)
def delete_document(self, uuid: str) ‑> None
-
Delete document.
Parameters
uuid
:str
- The uuid of the document to be deleted.
Returns
None
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
def delete_document(self, uuid: str) -> None: """ Delete document. Parameters ---------- uuid: str The uuid of the document to be deleted. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only delete a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = self._client.delete( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response)
def get_document(self, uuid: str) ‑> Document
-
Gets a document.
Parameters
uuid
:str
- The name of the document to get.
Returns
Document
- The retrieved document.
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
def get_document(self, uuid: str) -> Document: """ Gets a document. Parameters ---------- uuid: str The name of the document to get. Returns ------- Document The retrieved document. Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only get a document once a collection has been retrieved" ) if uuid is None or uuid.strip() == "": raise ValueError("document uuid must be provided") response = self._client.get( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", ) handle_response(response) return Document(**response.json())
def get_documents(self, uuids: List[str]) ‑> List[Document]
-
Gets a list of documents.
Parameters
uuids
:List[str]
- The list of document uuids to get.
Returns
List[Document]
- The list of document objects.
Raises
APIError
- If the API response format is unexpected.
Expand source code
def get_documents(self, uuids: List[str]) -> List[Document]: """ Gets a list of documents. Parameters ---------- uuids: List[str] The list of document uuids to get. Returns ------- List[Document] The list of document objects. Raises ------ APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only get documents once a collection has been retrieved" ) if not uuids or len(uuids) == 0: raise ValueError("document uuids must be provided") if len(uuids) > LARGE_BATCH_WARNING_LIMIT: warnings.warn(LARGE_BATCH_WARNING, stacklevel=2) response = self._client.post( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/list/get", json={"uuids": uuids}, ) handle_response(response) return [Document(**document) for document in response.json()]
def model_post_init(self: BaseModel, __context: Any) ‑> None
-
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args
self
- The BaseModel instance.
__context
- The context.
Expand source code
def init_private_attributes(self: BaseModel, __context: Any) -> None: """This function is meant to behave like a BaseModel method to initialise private attributes. It takes context as an argument since that's what pydantic-core passes when calling it. Args: self: The BaseModel instance. __context: The context. """ if getattr(self, '__pydantic_private__', None) is None: pydantic_private = {} for name, private_attr in self.__private_attributes__.items(): default = private_attr.get_default() if default is not PydanticUndefined: pydantic_private[name] = default object_setattr(self, '__pydantic_private__', pydantic_private)
def search(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> List[Document]
-
Searches over documents in a collection based on provided search criteria. One of text, or metadata must be provided.
Returns an empty list if no documents are found.
Parameters
text
:Optional[str]
, optional- The search text.
metadata
:Optional[Dict[str, Any]]
, optional- Document metadata to filter on.
limit
:Optional[int]
, optional- Limit the number of returned documents.
search_type
:Optional[str]
, optional- The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr".
mmr_lambda
:Optional[float]
, optional- The lambda parameter for the MMR Reranking Algorithm.
Returns
List[Document]
- The list of documents that match the search criteria.
Raises
APIError
- If the API response format is unexpected or there's an error from the API.
Expand source code
def search( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> List[Document]: """ Searches over documents in a collection based on provided search criteria. One of text, or metadata must be provided. Returns an empty list if no documents are found. Parameters ---------- text : Optional[str], optional The search text. metadata : Optional[Dict[str, Any]], optional Document metadata to filter on. limit : Optional[int], optional Limit the number of returned documents. search_type : Optional[str], optional The type of search to perform. Defaults to "similarity". Must be one of "similarity" or "mmr". mmr_lambda : Optional[float], optional The lambda parameter for the MMR Reranking Algorithm. Returns ------- List[Document] The list of documents that match the search criteria. Raises ------ APIError If the API response format is unexpected or there's an error from the API. """ results, _ = self.search_return_query_vector( text=text, metadata=metadata, limit=limit, search_type=search_type, mmr_lambda=mmr_lambda, ) return results
def search_return_query_vector(self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None) ‑> Tuple[List[Document], List[float]]
-
Expand source code
def search_return_query_vector( self, text: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, limit: Optional[int] = None, search_type: Optional[str] = None, mmr_lambda: Optional[float] = None, ) -> Tuple[List[Document], List[float]]: if not self._client: raise ValueError( "Can only search documents once a collection has been retrieved" ) if text is None is None and metadata is None: raise ValueError("One of text or metadata must be provided.") if text is not None and not isinstance(text, str): raise ValueError("Text must be a string.") search_type_value = SearchType(search_type or "similarity") payload = DocumentSearchPayload( text=text, metadata=metadata, search_type=search_type_value, mmr_lambda=mmr_lambda, ) url = f"/collections/{urllib.parse.quote_plus(self.name)}/search" params = {"limit": limit} if limit is not None and limit > 0 else {} response = self._client.post( url, params=params, json=payload.model_dump(exclude_none=True, exclude_unset=True), ) # If the collection is not found, return an empty list if response.status_code == 404: return [], [] # Otherwise, handle the response for other errors handle_response(response) return ( [Document(**document) for document in response.json()["results"]], response.json()["query_vector"], )
def update_document(self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) ‑> None
-
Update document by UUID.
Parameters
uuid
:str
- The UUID of the document to update.
document_id
:Optional[str]
- The document_id of the document.
metadata
:Optional[Dict[str, Any]]
- The metadata of the document.
Returns
None
Raises
NotFoundError
- If the document is not found.
APIError
- If the API response format is unexpected.
Expand source code
def update_document( self, uuid: str, document_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ Update document by UUID. Parameters ---------- uuid : str The UUID of the document to update. document_id : Optional[str] The document_id of the document. metadata : Optional[Dict[str, Any]] The metadata of the document. Returns ------- None Raises ------ NotFoundError If the document is not found. APIError If the API response format is unexpected. """ if not self._client: raise ValueError( "Can only update documents once a collection has been retrieved or" " created" ) if uuid is None: raise ValueError("document uuid must be provided") if document_id is None and metadata is None: raise ValueError("document_id or metadata must be provided") payload = filter_dict({"document_id": document_id, "metadata": metadata}) response = self._client.patch( f"/collections/{urllib.parse.quote_plus(self.name)}/documents/uuid/{uuid}", json=payload, ) handle_response(response)
Inherited members