Skip to content

langchain-pinecone

PyPI - Version PyPI - License PyPI - Downloads

langchain_pinecone

PineconeEmbeddings

Bases: BaseModel, Embeddings

PineconeEmbeddings embedding model.

Example
from langchain_pinecone import PineconeEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document

# Initialize embeddings with a specific model
embeddings = PineconeEmbeddings(model="multilingual-e5-large")

# Embed a single query
query_embedding = embeddings.embed_query("What is machine learning?")

# Embed multiple documents
docs = ["Document 1 content", "Document 2 content"]
doc_embeddings = embeddings.embed_documents(docs)

# Use with PineconeVectorStore
from pinecone import Pinecone

pc = Pinecone(api_key="your-api-key")
index = pc.Index("your-index-name")

vectorstore = PineconeVectorStore(
    index=index,
    embedding=embeddings
)

# Add documents to vector store
vectorstore.add_documents([
    Document(page_content="Hello, world!"),
    Document(page_content="This is a test.")
])

# Search for similar documents
results = vectorstore.similarity_search("hello", k=2)
METHOD DESCRIPTION
set_default_config

Set default configuration based on model.

list_supported_models

Return a list of supported embedding models from Pinecone.

alist_supported_models

Return a list of supported embedding models from Pinecone asynchronously.

validate_model_supported

Validate that the provided model is supported by Pinecone.

validate_environment

Validate that Pinecone version and credentials exist in environment.

embed_documents

Embed search docs.

aembed_documents

Asynchronous Embed search docs.

embed_query

Embed query text.

aembed_query

Asynchronously embed query text.

batch_size class-attribute instance-attribute

batch_size: int | None = None

Batch size for embedding documents.

query_params class-attribute instance-attribute

query_params: dict = Field(default_factory=dict)

Parameters for embedding query.

document_params class-attribute instance-attribute

document_params: dict = Field(default_factory=dict)

Parameters for embedding document

pinecone_api_key class-attribute instance-attribute

pinecone_api_key: SecretStr = Field(
    default_factory=secret_from_env(
        "PINECONE_API_KEY",
        error_message="Pinecone API key not found. Please set the PINECONE_API_KEY environment variable or pass it via `pinecone_api_key`.",
    ),
    alias="pinecone_api_key",
    validation_alias=AliasChoices("pinecone_api_key", "api_key"),
)

Pinecone API key.

If not provided, will look for the PINECONE_API_KEY environment variable.

async_client property

async_client: PineconeAsyncio

Lazily initialize the async client.

set_default_config classmethod

set_default_config(values: dict) -> Any

Set default configuration based on model.

list_supported_models

list_supported_models(vector_type: str | None = None) -> list

Return a list of supported embedding models from Pinecone.

alist_supported_models async

alist_supported_models(vector_type: str | None = None) -> list

Return a list of supported embedding models from Pinecone asynchronously.

validate_model_supported

validate_model_supported() -> Self

Validate that the provided model is supported by Pinecone.

validate_environment

validate_environment() -> Self

Validate that Pinecone version and credentials exist in environment.

embed_documents

embed_documents(texts: list[str]) -> list[list[float]]

Embed search docs.

aembed_documents async

aembed_documents(texts: list[str]) -> list[list[float]]

Asynchronous Embed search docs.

PARAMETER DESCRIPTION
texts

List of text to embed.

TYPE: list[str]

RETURNS DESCRIPTION
list[list[float]]

List of embeddings.

embed_query

embed_query(text: str) -> list[float]

Embed query text.

aembed_query async

aembed_query(text: str) -> list[float]

Asynchronously embed query text.

PineconeSparseEmbeddings

Bases: PineconeEmbeddings

PineconeSparseEmbeddings embedding model.

Example
from langchain_pinecone import PineconeSparseEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document

# Initialize sparse embeddings
sparse_embeddings = PineconeSparseEmbeddings(model="pinecone-sparse-english-v0")

# Embed a single query (returns SparseValues)
query_embedding = sparse_embeddings.embed_query("What is machine learning?")
# query_embedding contains SparseValues with indices and values

# Embed multiple documents
docs = ["Document 1 content", "Document 2 content"]
doc_embeddings = sparse_embeddings.embed_documents(docs)

# Use with an index configured for sparse vectors
from pinecone import Pinecone

pc = Pinecone(api_key="your-api-key")

# Create index with sparse embeddings support
if not pc.has_index("sparse-index"):
    pc.create_index_for_model(
        name="sparse-index",
        cloud="aws",
        region="us-east-1",
        embed={
            "model": "pinecone-sparse-english-v0",
            "field_map": {"text": "chunk_text"},
            "metric": "dotproduct",
            "read_parameters": {},
            "write_parameters": {}
        }
    )

index = pc.Index("sparse-index")

# IMPORTANT: Use PineconeSparseVectorStore for sparse vectors
# The regular PineconeVectorStore won't work with sparse embeddings
from langchain_pinecone.vectorstores_sparse import PineconeSparseVectorStore

# Initialize sparse vector store with sparse embeddings
vector_store = PineconeSparseVectorStore(
    index=index,
    embedding=sparse_embeddings
)

# Add documents
from uuid import uuid4

documents = [
    Document(page_content="Machine learning is awesome", metadata={"source": "article"}),
    Document(page_content="Neural networks power modern AI", metadata={"source": "book"})
]

# Generate unique IDs for each document
uuids = [str(uuid4()) for _ in range(len(documents))]

# Add documents to the vector store
vector_store.add_documents(documents=documents, ids=uuids)

# Search for similar documents
results = vector_store.similarity_search("machine learning", k=2)
METHOD DESCRIPTION
list_supported_models

Return a list of supported embedding models from Pinecone.

alist_supported_models

Return a list of supported embedding models from Pinecone asynchronously.

validate_model_supported

Validate that the provided model is supported by Pinecone.

validate_environment

Validate that Pinecone version and credentials exist in environment.

set_default_config

Set default configuration based on model.

embed_documents

Embed search docs with sparse embeddings.

aembed_documents

Asynchronously embed search docs with sparse embeddings.

embed_query

Embed query text with sparse embeddings.

aembed_query

Asynchronously embed query text with sparse embeddings.

batch_size class-attribute instance-attribute

batch_size: int | None = None

Batch size for embedding documents.

query_params class-attribute instance-attribute

query_params: dict = Field(default_factory=dict)

Parameters for embedding query.

document_params class-attribute instance-attribute

document_params: dict = Field(default_factory=dict)

Parameters for embedding document

pinecone_api_key class-attribute instance-attribute

pinecone_api_key: SecretStr = Field(
    default_factory=secret_from_env(
        "PINECONE_API_KEY",
        error_message="Pinecone API key not found. Please set the PINECONE_API_KEY environment variable or pass it via `pinecone_api_key`.",
    ),
    alias="pinecone_api_key",
    validation_alias=AliasChoices("pinecone_api_key", "api_key"),
)

Pinecone API key.

If not provided, will look for the PINECONE_API_KEY environment variable.

async_client property

async_client: PineconeAsyncio

Lazily initialize the async client.

list_supported_models

list_supported_models(vector_type: str | None = None) -> list

Return a list of supported embedding models from Pinecone.

alist_supported_models async

alist_supported_models(vector_type: str | None = None) -> list

Return a list of supported embedding models from Pinecone asynchronously.

validate_model_supported

validate_model_supported() -> Self

Validate that the provided model is supported by Pinecone.

validate_environment

validate_environment() -> Self

Validate that Pinecone version and credentials exist in environment.

set_default_config classmethod

set_default_config(values: dict) -> Any

Set default configuration based on model.

embed_documents

embed_documents(texts: list[str]) -> list[SparseValues]

Embed search docs with sparse embeddings.

aembed_documents async

aembed_documents(texts: list[str]) -> list[SparseValues]

Asynchronously embed search docs with sparse embeddings.

embed_query

embed_query(text: str) -> SparseValues

Embed query text with sparse embeddings.

aembed_query async

aembed_query(text: str) -> SparseValues

Asynchronously embed query text with sparse embeddings.

PineconeRerank

Bases: BaseDocumentCompressor

Document compressor that uses Pinecone Rerank API.

METHOD DESCRIPTION
validate_model_supported

Validate that the provided model is supported by Pinecone for reranking.

list_supported_models

Return a list of supported embedding models from Pinecone.

alist_supported_models

Return a list of supported reranker models from Pinecone asynchronously.

rerank

Returns an ordered list of documents ordered by their relevance to the provided query.

arerank

Async rerank documents using Pinecone's reranking API.

compress_documents

Compress documents using Pinecone's rerank API.

acompress_documents

Async compress documents using Pinecone's rerank API.

client class-attribute instance-attribute

client: Pinecone | None = None

Pinecone client to use for compressing documents.

async_client class-attribute instance-attribute

async_client: PineconeAsyncio | None = None

Pinecone client to use for compressing documents.

top_n class-attribute instance-attribute

top_n: int | None = 3

Number of documents to return.

model class-attribute instance-attribute

model: str = Field(
    default="bge-reranker-v2-m3",
    description="Model to use for reranking. Default is 'bge-reranker-v2-m3'.",
)

Model to use for reranking.

pinecone_api_key class-attribute instance-attribute

pinecone_api_key: SecretStr = Field(
    default_factory=secret_from_env(
        "PINECONE_API_KEY",
        error_message="Pinecone API key not found. Please set the PINECONE_API_KEY environment variable or pass it via `pinecone_api_key`.",
    ),
    alias="pinecone_api_key",
    validation_alias=AliasChoices("pinecone_api_key", "api_key"),
)

Pinecone API key.

If not provided, will look for the PINECONE_API_KEY environment variable.

rank_fields class-attribute instance-attribute

rank_fields: list[str] | None = None

Fields to use for reranking when documents are dictionaries.

return_documents class-attribute instance-attribute

return_documents: bool = True

Whether to return the documents in the reranking results.

validate_model_supported

validate_model_supported() -> Any

Validate that the provided model is supported by Pinecone for reranking.

list_supported_models

list_supported_models(vector_type: str | None = None) -> list

Return a list of supported embedding models from Pinecone.

alist_supported_models async

alist_supported_models(vector_type: str | None = None) -> list

Return a list of supported reranker models from Pinecone asynchronously.

rerank

rerank(
    documents: Sequence[str | Document | dict],
    query: str,
    *,
    rank_fields: list[str] | None = None,
    model: str | None = None,
    top_n: int | None = None,
    truncate: str = "END",
) -> list[dict[str, Any]]

Returns an ordered list of documents ordered by their relevance to the provided query.

arerank async

arerank(
    documents: Sequence[str | Document | dict],
    query: str,
    *,
    rank_fields: list[str] | None = None,
    model: str | None = None,
    top_n: int | None = None,
    truncate: str = "END",
) -> list[dict[str, Any]]

Async rerank documents using Pinecone's reranking API.

compress_documents

compress_documents(
    documents: Sequence[Document], query: str, callbacks: Callbacks | None = None
) -> Sequence[Document]

Compress documents using Pinecone's rerank API.

acompress_documents async

acompress_documents(
    documents: Sequence[Document], query: str, callbacks: Callbacks | None = None
) -> Sequence[Document]

Async compress documents using Pinecone's rerank API.

Pinecone

Bases: PineconeVectorStore

Deprecated. Use PineconeVectorStore instead.

METHOD DESCRIPTION
add_texts

Run more texts through the embeddings and add to the vectorstore.

delete

Delete by vector IDs or filter.

get_by_ids

Get documents by their IDs.

aget_by_ids

Async get documents by their IDs.

adelete

Async delete by vector ID or other criteria.

aadd_texts

Asynchronously run more texts through the embeddings and add to the vectorstore.

add_documents

Add or update documents in the VectorStore.

aadd_documents

Async run more documents through the embeddings and add to the VectorStore.

search

Return docs most similar to query using a specified search type.

asearch

Async return docs most similar to query using a specified search type.

similarity_search

Return pinecone documents most similar to query.

similarity_search_with_score

Return pinecone documents most similar to query, along with scores.

asimilarity_search_with_score

Asynchronously return pinecone documents most similar to query, along with scores.

similarity_search_with_relevance_scores

Return docs and relevance scores in the range [0, 1].

asimilarity_search_with_relevance_scores

Async return docs and relevance scores in the range [0, 1].

asimilarity_search

Async return docs most similar to query.

similarity_search_by_vector

Return documents most similar to the given embedding vector.

asimilarity_search_by_vector

Return documents most similar to the given embedding vector asynchronously.

max_marginal_relevance_search

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search

Async return docs selected using the maximal marginal relevance.

max_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance asynchronously.

from_documents

Return VectorStore initialized from documents and embeddings.

afrom_documents

Async return VectorStore initialized from documents and embeddings.

from_texts

Construct Pinecone wrapper from raw documents.

afrom_texts

Async return VectorStore initialized from texts and embeddings.

as_retriever

Return VectorStoreRetriever initialized from this VectorStore.

similarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores asynchronously.

get_pinecone_index

Return a Pinecone Index instance.

from_existing_index

Load pinecone vectorstore from index name.

embeddings property

embeddings: Embeddings | None

Access the query embedding object if available.

index property

index: _Index

Get synchronous index instance.

async_index async property

async_index: _IndexAsyncio

Get asynchronous index instance.

add_texts

add_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    async_req: bool = True,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. async_req: Whether runs asynchronously. Defaults to True. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

delete

delete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Delete by vector IDs or filter. Args: ids: List of ids to delete. delete_all: Whether delete all vectors in the index. filter: Dictionary of conditions to filter vectors to delete. namespace: Namespace to search in. Default will search in '' namespace.

get_by_ids

get_by_ids(ids: Sequence[str]) -> list[Document]

Get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

aget_by_ids async

aget_by_ids(ids: Sequence[str]) -> list[Document]

Async get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

adelete async

adelete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Async delete by vector ID or other criteria.

PARAMETER DESCRIPTION
ids

List of IDs to delete. If None, delete all.

TYPE: list[str] | None DEFAULT: None

**kwargs

Other keyword arguments that subclasses might use.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
bool | None

True if deletion is successful, False otherwise, None if not implemented.

aadd_texts async

aadd_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Asynchronously run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

add_documents

add_documents(documents: list[Document], **kwargs: Any) -> list[str]

Add or update documents in the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

If kwargs contains IDs and documents contain ids, the IDs in the kwargs will receive precedence.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

aadd_documents async

aadd_documents(documents: list[Document], **kwargs: Any) -> list[str]

Async run more documents through the embeddings and add to the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

search

search(query: str, search_type: str, **kwargs: Any) -> list[Document]

Return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

asearch async

asearch(query: str, search_type: str, **kwargs: Any) -> list[Document]

Async return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

similarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return pinecone documents most similar to query.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents most similar to the query and score for each

similarity_search_with_score

similarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

asimilarity_search_with_score async

asimilarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Asynchronously return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

similarity_search_with_relevance_scores

similarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score).

asimilarity_search_with_relevance_scores async

asimilarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Async return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score)

asimilarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs most similar to query.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

similarity_search_by_vector

similarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector.

Wraps similarity_search_by_vector_with_score but strips the scores.

asimilarity_search_by_vector async

asimilarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector asynchronously.

Wraps asimilarity_search_by_vector_with_score but strips the scores.

max_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

fetch_k

Number of Document objects to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity.

TYPE: float DEFAULT: 0.5

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects selected by maximal marginal relevance.

max_marginal_relevance_search_by_vector

max_marginal_relevance_search_by_vector(
    embedding: list[float],
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: list[float]

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search_by_vector async

amax_marginal_relevance_search_by_vector(
    embedding: list[float],
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance asynchronously.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: list[float]

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

from_documents classmethod

from_documents(documents: list[Document], embedding: Embeddings, **kwargs: Any) -> Self

Return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

afrom_documents async classmethod

afrom_documents(
    documents: list[Document], embedding: Embeddings, **kwargs: Any
) -> Self

Async return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

from_texts classmethod

from_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    pool_threads: int = 4,
    embeddings_chunk_size: int = 1000,
    async_req: bool = True,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Construct Pinecone wrapper from raw documents.

This is a user-friendly interface that
  1. Embeds documents.
  2. Adds the documents to a provided Pinecone index

This is intended to be a quick way to get started.

The pool_threads affects the speed of the upsert operations.

Setup: set the PINECONE_API_KEY environment variable to your Pinecone API key.

Example
from langchain_pinecone import PineconeVectorStore, PineconeEmbeddings

embeddings = PineconeEmbeddings(model="multilingual-e5-large")

index_name = "my-index"
vectorstore = PineconeVectorStore.from_texts(
    texts,
    index_name=index_name,
    embedding=embedding,
    namespace=namespace,
)

afrom_texts async classmethod

afrom_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    embeddings_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Async return VectorStore initialized from texts and embeddings.

PARAMETER DESCRIPTION
texts

Texts to add to the VectorStore.

TYPE: list[str]

embedding

Embedding function to use.

TYPE: Embeddings

metadatas

Optional list of metadatas associated with the texts.

TYPE: list[dict] | None DEFAULT: None

ids

Optional list of IDs associated with the texts.

TYPE: list[str] | None DEFAULT: None

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from texts and embeddings.

as_retriever

as_retriever(**kwargs: Any) -> VectorStoreRetriever

Return VectorStoreRetriever initialized from this VectorStore.

PARAMETER DESCRIPTION
**kwargs

Keyword arguments to pass to the search function.

Can include:

  • search_type: Defines the type of search that the Retriever should perform. Can be 'similarity' (default), 'mmr', or 'similarity_score_threshold'.
  • search_kwargs: Keyword arguments to pass to the search function.

    Can include things like:

    • k: Amount of documents to return (Default: 4)
    • score_threshold: Minimum relevance threshold for similarity_score_threshold
    • fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
    • lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5)
    • filter: Filter by document metadata

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
VectorStoreRetriever

Retriever class for VectorStore.

Examples:

# Retrieve more documents with higher diversity
# Useful if your dataset has many similar documents
docsearch.as_retriever(
    search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
)

# Fetch more documents for the MMR algorithm to consider
# But only return the top 5
docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})

# Only retrieve documents that have a relevance score
# Above a certain threshold
docsearch.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.8},
)

# Only get the single most similar document from the dataset
docsearch.as_retriever(search_kwargs={"k": 1})

# Use a filter to only retrieve documents from a specific paper
docsearch.as_retriever(
    search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
)

similarity_search_by_vector_with_score

similarity_search_by_vector_with_score(
    embedding: list[float], *, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score async

asimilarity_search_by_vector_with_score(
    embedding: list[float], *, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores asynchronously.

get_pinecone_index classmethod

get_pinecone_index(
    index_name: str | None,
    pool_threads: int = 4,
    *,
    pinecone_api_key: str | None = None,
) -> _Index

Return a Pinecone Index instance.

PARAMETER DESCRIPTION
index_name

Name of the index to use.

TYPE: str | None

pool_threads

Number of threads to use for index upsert.

TYPE: int DEFAULT: 4

pinecone_api_key

The api_key of Pinecone.

TYPE: str | None DEFAULT: None

Returns: Pinecone Index instance.

from_existing_index classmethod

from_existing_index(
    index_name: str,
    embedding: Embeddings,
    text_key: str = "text",
    namespace: str | None = None,
    pool_threads: int = 4,
) -> PineconeVectorStore

Load pinecone vectorstore from index name.

PineconeVectorStore

Bases: VectorStore

Pinecone vector store integration.

Setup

Install langchain-pinecone and set the environment variable PINECONE_API_KEY.

pip install -qU langchain-pinecone
export PINECONE_API_KEY="your-pinecone-api-key"

Key init args — indexing params: embedding: Embeddings Embedding function to use.

Key init args — client params: index: Optional[Index] Index to use.

TODO: Replace with relevant init params.

Instantiate:

import time
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings

pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

index_name = "langchain-test-index"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        deletion_protection="enabled",  # Defaults to "disabled"
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=OpenAIEmbeddings())

Add Documents
from langchain_core.documents import Document

document_1 = Document(page_content="foo", metadata={"baz": "bar"})
document_2 = Document(page_content="thud", metadata={"bar": "baz"})
document_3 = Document(page_content="i will be deleted :(")

documents = [document_1, document_2, document_3]
ids = ["1", "2", "3"]
vector_store.add_documents(documents=documents, ids=ids)
Delete Documents
vector_store.delete(ids=["3"])
Search with filter
results = vector_store.similarity_search(query="thud", k=1, filter={"bar": "baz"})
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")
* thud [{'bar': 'baz'}]
Search with score
results = vector_store.similarity_search_with_score(query="qux", k=1)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
* [SIM=0.832268] foo [{'baz': 'bar'}]
Async
# add documents
# await vector_store.aadd_documents(documents=documents, ids=ids)

# delete documents
# await vector_store.adelete(ids=["3"])

# search
# results = vector_store.asimilarity_search(query="thud", k=1)

# search with score
results = await vector_store.asimilarity_search_with_score(query="qux", k=1)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
* [SIM=0.832268] foo [{'baz': 'bar'}]
Use as Retriever
retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
)
retriever.invoke("thud")
[Document(metadata={'bar': 'baz'}, page_content='thud')]
METHOD DESCRIPTION
get_by_ids

Get documents by their IDs.

aget_by_ids

Async get documents by their IDs.

add_documents

Add or update documents in the VectorStore.

aadd_documents

Async run more documents through the embeddings and add to the VectorStore.

search

Return docs most similar to query using a specified search type.

asearch

Async return docs most similar to query using a specified search type.

similarity_search_with_relevance_scores

Return docs and relevance scores in the range [0, 1].

asimilarity_search_with_relevance_scores

Async return docs and relevance scores in the range [0, 1].

from_documents

Return VectorStore initialized from documents and embeddings.

afrom_documents

Async return VectorStore initialized from documents and embeddings.

as_retriever

Return VectorStoreRetriever initialized from this VectorStore.

add_texts

Run more texts through the embeddings and add to the vectorstore.

aadd_texts

Asynchronously run more texts through the embeddings and add to the vectorstore.

similarity_search_by_vector

Return documents most similar to the given embedding vector.

asimilarity_search_by_vector

Return documents most similar to the given embedding vector asynchronously.

similarity_search_with_score

Return pinecone documents most similar to query, along with scores.

asimilarity_search_with_score

Asynchronously return pinecone documents most similar to query, along with scores.

similarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores asynchronously.

similarity_search

Return pinecone documents most similar to query.

asimilarity_search

Async return docs most similar to query.

max_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance asynchronously.

max_marginal_relevance_search

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search

Async return docs selected using the maximal marginal relevance.

get_pinecone_index

Return a Pinecone Index instance.

from_texts

Construct Pinecone wrapper from raw documents.

afrom_texts

Async return VectorStore initialized from texts and embeddings.

from_existing_index

Load pinecone vectorstore from index name.

delete

Delete by vector IDs or filter.

adelete

Async delete by vector ID or other criteria.

index property

index: _Index

Get synchronous index instance.

async_index async property

async_index: _IndexAsyncio

Get asynchronous index instance.

embeddings property

embeddings: Embeddings | None

Access the query embedding object if available.

get_by_ids

get_by_ids(ids: Sequence[str]) -> list[Document]

Get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

aget_by_ids async

aget_by_ids(ids: Sequence[str]) -> list[Document]

Async get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

add_documents

add_documents(documents: list[Document], **kwargs: Any) -> list[str]

Add or update documents in the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

If kwargs contains IDs and documents contain ids, the IDs in the kwargs will receive precedence.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

aadd_documents async

aadd_documents(documents: list[Document], **kwargs: Any) -> list[str]

Async run more documents through the embeddings and add to the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

search

search(query: str, search_type: str, **kwargs: Any) -> list[Document]

Return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

asearch async

asearch(query: str, search_type: str, **kwargs: Any) -> list[Document]

Async return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

similarity_search_with_relevance_scores

similarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score).

asimilarity_search_with_relevance_scores async

asimilarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Async return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score)

from_documents classmethod

from_documents(documents: list[Document], embedding: Embeddings, **kwargs: Any) -> Self

Return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

afrom_documents async classmethod

afrom_documents(
    documents: list[Document], embedding: Embeddings, **kwargs: Any
) -> Self

Async return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

as_retriever

as_retriever(**kwargs: Any) -> VectorStoreRetriever

Return VectorStoreRetriever initialized from this VectorStore.

PARAMETER DESCRIPTION
**kwargs

Keyword arguments to pass to the search function.

Can include:

  • search_type: Defines the type of search that the Retriever should perform. Can be 'similarity' (default), 'mmr', or 'similarity_score_threshold'.
  • search_kwargs: Keyword arguments to pass to the search function.

    Can include things like:

    • k: Amount of documents to return (Default: 4)
    • score_threshold: Minimum relevance threshold for similarity_score_threshold
    • fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
    • lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5)
    • filter: Filter by document metadata

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
VectorStoreRetriever

Retriever class for VectorStore.

Examples:

# Retrieve more documents with higher diversity
# Useful if your dataset has many similar documents
docsearch.as_retriever(
    search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
)

# Fetch more documents for the MMR algorithm to consider
# But only return the top 5
docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})

# Only retrieve documents that have a relevance score
# Above a certain threshold
docsearch.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.8},
)

# Only get the single most similar document from the dataset
docsearch.as_retriever(search_kwargs={"k": 1})

# Use a filter to only retrieve documents from a specific paper
docsearch.as_retriever(
    search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
)

add_texts

add_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    async_req: bool = True,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. async_req: Whether runs asynchronously. Defaults to True. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

aadd_texts async

aadd_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Asynchronously run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

similarity_search_by_vector

similarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector.

Wraps similarity_search_by_vector_with_score but strips the scores.

asimilarity_search_by_vector async

asimilarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector asynchronously.

Wraps asimilarity_search_by_vector_with_score but strips the scores.

similarity_search_with_score

similarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

asimilarity_search_with_score async

asimilarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Asynchronously return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

similarity_search_by_vector_with_score

similarity_search_by_vector_with_score(
    embedding: list[float], *, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score async

asimilarity_search_by_vector_with_score(
    embedding: list[float], *, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores asynchronously.

similarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return pinecone documents most similar to query.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents most similar to the query and score for each

asimilarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs most similar to query.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

max_marginal_relevance_search_by_vector

max_marginal_relevance_search_by_vector(
    embedding: list[float],
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: list[float]

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search_by_vector async

amax_marginal_relevance_search_by_vector(
    embedding: list[float],
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance asynchronously.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: list[float]

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

max_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

fetch_k

Number of Document objects to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity.

TYPE: float DEFAULT: 0.5

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects selected by maximal marginal relevance.

get_pinecone_index classmethod

get_pinecone_index(
    index_name: str | None,
    pool_threads: int = 4,
    *,
    pinecone_api_key: str | None = None,
) -> _Index

Return a Pinecone Index instance.

PARAMETER DESCRIPTION
index_name

Name of the index to use.

TYPE: str | None

pool_threads

Number of threads to use for index upsert.

TYPE: int DEFAULT: 4

pinecone_api_key

The api_key of Pinecone.

TYPE: str | None DEFAULT: None

Returns: Pinecone Index instance.

from_texts classmethod

from_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    pool_threads: int = 4,
    embeddings_chunk_size: int = 1000,
    async_req: bool = True,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Construct Pinecone wrapper from raw documents.

This is a user-friendly interface that
  1. Embeds documents.
  2. Adds the documents to a provided Pinecone index

This is intended to be a quick way to get started.

The pool_threads affects the speed of the upsert operations.

Setup: set the PINECONE_API_KEY environment variable to your Pinecone API key.

Example
from langchain_pinecone import PineconeVectorStore, PineconeEmbeddings

embeddings = PineconeEmbeddings(model="multilingual-e5-large")

index_name = "my-index"
vectorstore = PineconeVectorStore.from_texts(
    texts,
    index_name=index_name,
    embedding=embedding,
    namespace=namespace,
)

afrom_texts async classmethod

afrom_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    embeddings_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Async return VectorStore initialized from texts and embeddings.

PARAMETER DESCRIPTION
texts

Texts to add to the VectorStore.

TYPE: list[str]

embedding

Embedding function to use.

TYPE: Embeddings

metadatas

Optional list of metadatas associated with the texts.

TYPE: list[dict] | None DEFAULT: None

ids

Optional list of IDs associated with the texts.

TYPE: list[str] | None DEFAULT: None

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from texts and embeddings.

from_existing_index classmethod

from_existing_index(
    index_name: str,
    embedding: Embeddings,
    text_key: str = "text",
    namespace: str | None = None,
    pool_threads: int = 4,
) -> PineconeVectorStore

Load pinecone vectorstore from index name.

delete

delete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Delete by vector IDs or filter. Args: ids: List of ids to delete. delete_all: Whether delete all vectors in the index. filter: Dictionary of conditions to filter vectors to delete. namespace: Namespace to search in. Default will search in '' namespace.

adelete async

adelete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Async delete by vector ID or other criteria.

PARAMETER DESCRIPTION
ids

List of IDs to delete. If None, delete all.

TYPE: list[str] | None DEFAULT: None

**kwargs

Other keyword arguments that subclasses might use.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
bool | None

True if deletion is successful, False otherwise, None if not implemented.

PineconeSparseVectorStore

Bases: PineconeVectorStore

Pinecone sparse vector store integration.

This class extends PineconeVectorStore to support sparse vector representations. It requires a Pinecone sparse index and PineconeSparseEmbeddings.

Setup
# Install required packages
pip install langchain-pinecone pinecone-client
Key init args - indexing params

text_key (str): The metadata key where the document text will be stored. namespace (str): Pinecone namespace to use. distance_strategy (DistanceStrategy): Strategy for computing distances.

Key init args - client params

index (pinecone.Index): A Pinecone sparse index. embedding (PineconeSparseEmbeddings): A sparse embeddings model. pinecone_api_key (str): The Pinecone API key. index_name (str): The name of the Pinecone index.

See full list of supported init args and their descriptions in the params section.

Instantiate
from pinecone import Pinecone
from langchain_pinecone import PineconeSparseVectorStore
from langchain_pinecone.embeddings import PineconeSparseEmbeddings

# Initialize Pinecone client
pc = Pinecone(api_key="your-api-key")

# Get your sparse index
index = pc.Index("your-sparse-index-name")

# Initialize embedding function
embeddings = PineconeSparseEmbeddings()

# Create vector store
vectorstore = PineconeSparseVectorStore(
    index=index,
    embedding=embeddings,
    text_key="content",
    namespace="my-namespace"
)
Add Documents
from langchain_core.documents import Document

docs = [
    Document(page_content="This is a sparse vector example"),
    Document(page_content="Another document for testing")
]

# Option 1: Add from Document objects
vectorstore.add_documents(docs)

# Option 2: Add from texts
texts = ["Text 1", "Text 2"]
metadatas = [{"source": "source1"}, {"source": "source2"}]
vectorstore.add_texts(texts, metadatas=metadatas)
Update Documents

Update documents by re-adding them with the same IDs.

ids = ["id1", "id2"]
texts = ["Updated text 1", "Updated text 2"]
metadatas = [{"source": "updated_source1"}, {"source": "updated_source2"}]

vectorstore.add_texts(texts, metadatas=metadatas, ids=ids)

Delete Documents
# Delete by IDs
vectorstore.delete(ids=["id1", "id2"])

# Delete by filter
vectorstore.delete(filter={"source": "source1"})

# Delete all documents in a namespace
vectorstore.delete(delete_all=True, namespace="my-namespace")
Search with score
# Search with relevance scores
docs_and_scores = vectorstore.similarity_search_with_score(
    "query text",
    k=5
)

for doc, score in docs_and_scores:
    print(f"Score: {score}, Document: {doc.page_content}")
Use as Retriever
# Create a retriever
retriever = vectorstore.as_retriever()

# Customize retriever
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5},
    filter={"source": "source1"}
)

# Use the retriever
docs = retriever.get_relevant_documents("query text")
METHOD DESCRIPTION
get_by_ids

Get documents by their IDs.

aget_by_ids

Async get documents by their IDs.

add_documents

Add or update documents in the VectorStore.

aadd_documents

Async run more documents through the embeddings and add to the VectorStore.

search

Return docs most similar to query using a specified search type.

asearch

Async return docs most similar to query using a specified search type.

similarity_search_with_relevance_scores

Return docs and relevance scores in the range [0, 1].

asimilarity_search_with_relevance_scores

Async return docs and relevance scores in the range [0, 1].

similarity_search_by_vector

Return documents most similar to the given embedding vector.

asimilarity_search_by_vector

Return documents most similar to the given embedding vector asynchronously.

from_documents

Return VectorStore initialized from documents and embeddings.

afrom_documents

Async return VectorStore initialized from documents and embeddings.

from_texts

Construct Pinecone wrapper from raw documents.

afrom_texts

Async return VectorStore initialized from texts and embeddings.

as_retriever

Return VectorStoreRetriever initialized from this VectorStore.

get_pinecone_index

Return a Pinecone Index instance.

from_existing_index

Load pinecone vectorstore from index name.

add_texts

Run more texts through the embeddings and add to the vectorstore.

aadd_texts

Asynchronously run more texts through the embeddings and add to the vectorstore.

similarity_search_with_score

Return pinecone documents most similar to query, along with scores.

asimilarity_search_with_score

Asynchronously return pinecone documents most similar to query, along with scores.

similarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score

Return pinecone documents most similar to embedding, along with scores asynchronously.

similarity_search

Return pinecone documents most similar to query.

asimilarity_search

Async return docs most similar to query.

max_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search_by_vector

Return docs selected using the maximal marginal relevance asynchronously.

max_marginal_relevance_search

Return docs selected using the maximal marginal relevance.

amax_marginal_relevance_search

Async return docs selected using the maximal marginal relevance.

delete

Delete by vector IDs or filter.

adelete

Async delete by vector ID or other criteria.

index property

index: _Index

Get synchronous index instance.

async_index async property

async_index: _IndexAsyncio

Get asynchronous index instance.

embeddings property

Access the query embedding object if available.

get_by_ids

get_by_ids(ids: Sequence[str]) -> list[Document]

Get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

aget_by_ids async

aget_by_ids(ids: Sequence[str]) -> list[Document]

Async get documents by their IDs.

The returned documents are expected to have the ID field set to the ID of the document in the vector store.

Fewer documents may be returned than requested if some IDs are not found or if there are duplicated IDs.

Users should not assume that the order of the returned documents matches the order of the input IDs. Instead, users should rely on the ID field of the returned documents.

This method should NOT raise exceptions if no documents are found for some IDs.

PARAMETER DESCRIPTION
ids

List of IDs to retrieve.

TYPE: Sequence[str]

RETURNS DESCRIPTION
list[Document]

List of Document objects.

add_documents

add_documents(documents: list[Document], **kwargs: Any) -> list[str]

Add or update documents in the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

If kwargs contains IDs and documents contain ids, the IDs in the kwargs will receive precedence.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

aadd_documents async

aadd_documents(documents: list[Document], **kwargs: Any) -> list[str]

Async run more documents through the embeddings and add to the VectorStore.

PARAMETER DESCRIPTION
documents

Documents to add to the VectorStore.

TYPE: list[Document]

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[str]

List of IDs of the added texts.

search

search(query: str, search_type: str, **kwargs: Any) -> list[Document]

Return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

asearch async

asearch(query: str, search_type: str, **kwargs: Any) -> list[Document]

Async return docs most similar to query using a specified search type.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

search_type

Type of search to perform.

Can be 'similarity', 'mmr', or 'similarity_score_threshold'.

TYPE: str

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

RAISES DESCRIPTION
ValueError

If search_type is not one of 'similarity', 'mmr', or 'similarity_score_threshold'.

similarity_search_with_relevance_scores

similarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score).

asimilarity_search_with_relevance_scores async

asimilarity_search_with_relevance_scores(
    query: str, k: int = 4, **kwargs: Any
) -> list[tuple[Document, float]]

Async return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Kwargs to be passed to similarity search.

Should include score_threshold, an optional floating point value between 0 to 1 to filter the resulting set of retrieved docs.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of tuples of (doc, similarity_score)

similarity_search_by_vector

similarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector.

Wraps similarity_search_by_vector_with_score but strips the scores.

asimilarity_search_by_vector async

asimilarity_search_by_vector(
    embedding: list[float], k: int = 4, **kwargs: Any
) -> list[Document]

Return documents most similar to the given embedding vector asynchronously.

Wraps asimilarity_search_by_vector_with_score but strips the scores.

from_documents classmethod

from_documents(documents: list[Document], embedding: Embeddings, **kwargs: Any) -> Self

Return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

afrom_documents async classmethod

afrom_documents(
    documents: list[Document], embedding: Embeddings, **kwargs: Any
) -> Self

Async return VectorStore initialized from documents and embeddings.

PARAMETER DESCRIPTION
documents

List of Document objects to add to the VectorStore.

TYPE: list[Document]

embedding

Embedding function to use.

TYPE: Embeddings

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from documents and embeddings.

from_texts classmethod

from_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    pool_threads: int = 4,
    embeddings_chunk_size: int = 1000,
    async_req: bool = True,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Construct Pinecone wrapper from raw documents.

This is a user-friendly interface that
  1. Embeds documents.
  2. Adds the documents to a provided Pinecone index

This is intended to be a quick way to get started.

The pool_threads affects the speed of the upsert operations.

Setup: set the PINECONE_API_KEY environment variable to your Pinecone API key.

Example
from langchain_pinecone import PineconeVectorStore, PineconeEmbeddings

embeddings = PineconeEmbeddings(model="multilingual-e5-large")

index_name = "my-index"
vectorstore = PineconeVectorStore.from_texts(
    texts,
    index_name=index_name,
    embedding=embedding,
    namespace=namespace,
)

afrom_texts async classmethod

afrom_texts(
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    batch_size: int = 32,
    text_key: str = "text",
    namespace: str | None = None,
    index_name: str | None = None,
    upsert_kwargs: dict | None = None,
    embeddings_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> PineconeVectorStore

Async return VectorStore initialized from texts and embeddings.

PARAMETER DESCRIPTION
texts

Texts to add to the VectorStore.

TYPE: list[str]

embedding

Embedding function to use.

TYPE: Embeddings

metadatas

Optional list of metadatas associated with the texts.

TYPE: list[dict] | None DEFAULT: None

ids

Optional list of IDs associated with the texts.

TYPE: list[str] | None DEFAULT: None

**kwargs

Additional keyword arguments.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Self

VectorStore initialized from texts and embeddings.

as_retriever

as_retriever(**kwargs: Any) -> VectorStoreRetriever

Return VectorStoreRetriever initialized from this VectorStore.

PARAMETER DESCRIPTION
**kwargs

Keyword arguments to pass to the search function.

Can include:

  • search_type: Defines the type of search that the Retriever should perform. Can be 'similarity' (default), 'mmr', or 'similarity_score_threshold'.
  • search_kwargs: Keyword arguments to pass to the search function.

    Can include things like:

    • k: Amount of documents to return (Default: 4)
    • score_threshold: Minimum relevance threshold for similarity_score_threshold
    • fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
    • lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5)
    • filter: Filter by document metadata

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
VectorStoreRetriever

Retriever class for VectorStore.

Examples:

# Retrieve more documents with higher diversity
# Useful if your dataset has many similar documents
docsearch.as_retriever(
    search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
)

# Fetch more documents for the MMR algorithm to consider
# But only return the top 5
docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})

# Only retrieve documents that have a relevance score
# Above a certain threshold
docsearch.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.8},
)

# Only get the single most similar document from the dataset
docsearch.as_retriever(search_kwargs={"k": 1})

# Use a filter to only retrieve documents from a specific paper
docsearch.as_retriever(
    search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
)

get_pinecone_index classmethod

get_pinecone_index(
    index_name: str | None,
    pool_threads: int = 4,
    *,
    pinecone_api_key: str | None = None,
) -> _Index

Return a Pinecone Index instance.

PARAMETER DESCRIPTION
index_name

Name of the index to use.

TYPE: str | None

pool_threads

Number of threads to use for index upsert.

TYPE: int DEFAULT: 4

pinecone_api_key

The api_key of Pinecone.

TYPE: str | None DEFAULT: None

Returns: Pinecone Index instance.

from_existing_index classmethod

from_existing_index(
    index_name: str,
    embedding: Embeddings,
    text_key: str = "text",
    namespace: str | None = None,
    pool_threads: int = 4,
) -> PineconeVectorStore

Load pinecone vectorstore from index name.

add_texts

add_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    async_req: bool = True,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. async_req: Whether runs asynchronously. Defaults to True. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

aadd_texts async

aadd_texts(
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    ids: list[str] | None = None,
    namespace: str | None = None,
    batch_size: int = 32,
    embedding_chunk_size: int = 1000,
    *,
    id_prefix: str | None = None,
    **kwargs: Any,
) -> list[str]

Asynchronously run more texts through the embeddings and add to the vectorstore.

Upsert optimization is done by chunking the embeddings and upserting them. This is done to avoid memory issues and optimize using HTTP based embeddings. For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index, embedding_chunk_size>1000 and batch_size~64 for best performance. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. namespace: Optional pinecone namespace to add the texts to. batch_size: Batch size to use when adding the texts to the vectorstore. embedding_chunk_size: Chunk size to use when embedding the texts. id_prefix: Optional string to use as an ID prefix when upserting vectors.

RETURNS DESCRIPTION
list[str]

List of ids from adding the texts into the vectorstore.

similarity_search_with_score

similarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

asimilarity_search_with_score async

asimilarity_search_with_score(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Asynchronously return pinecone documents most similar to query, along with scores.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[Document, float]]

List of Documents most similar to the query and score for each

similarity_search_by_vector_with_score

similarity_search_by_vector_with_score(
    embedding: SparseValues,
    *,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores.

asimilarity_search_by_vector_with_score async

asimilarity_search_by_vector_with_score(
    embedding: SparseValues,
    *,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[tuple[Document, float]]

Return pinecone documents most similar to embedding, along with scores asynchronously.

similarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return pinecone documents most similar to query.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents most similar to the query and score for each

asimilarity_search(
    query: str,
    k: int = 4,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs most similar to query.

PARAMETER DESCRIPTION
query

Input text.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects most similar to the query.

max_marginal_relevance_search_by_vector

max_marginal_relevance_search_by_vector(
    embedding: SparseValues,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: SparseValues

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search_by_vector async

amax_marginal_relevance_search_by_vector(
    embedding: SparseValues,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance asynchronously.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
embedding

Embedding to look up documents similar to.

TYPE: SparseValues

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

max_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Documents to return. Defaults to 4.

TYPE: int DEFAULT: 4

fetch_k

Number of Documents to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.

TYPE: float DEFAULT: 0.5

filter

Dictionary of argument(s) to filter on metadata

TYPE: dict | None DEFAULT: None

namespace

Namespace to search in. Default will search in '' namespace.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Document]

List of Documents selected by maximal marginal relevance.

amax_marginal_relevance_search(
    query: str,
    k: int = 4,
    fetch_k: int = 20,
    lambda_mult: float = 0.5,
    filter: dict | None = None,
    namespace: str | None = None,
    **kwargs: Any,
) -> list[Document]

Async return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

PARAMETER DESCRIPTION
query

Text to look up documents similar to.

TYPE: str

k

Number of Document objects to return.

TYPE: int DEFAULT: 4

fetch_k

Number of Document objects to fetch to pass to MMR algorithm.

TYPE: int DEFAULT: 20

lambda_mult

Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity.

TYPE: float DEFAULT: 0.5

**kwargs

Arguments to pass to the search method.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
list[Document]

List of Document objects selected by maximal marginal relevance.

delete

delete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Delete by vector IDs or filter. Args: ids: List of ids to delete. delete_all: Whether delete all vectors in the index. filter: Dictionary of conditions to filter vectors to delete. namespace: Namespace to search in. Default will search in '' namespace.

adelete async

adelete(
    ids: list[str] | None = None,
    delete_all: bool | None = None,
    namespace: str | None = None,
    filter: dict | None = None,
    **kwargs: Any,
) -> None

Async delete by vector ID or other criteria.

PARAMETER DESCRIPTION
ids

List of IDs to delete. If None, delete all.

TYPE: list[str] | None DEFAULT: None

**kwargs

Other keyword arguments that subclasses might use.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
bool | None

True if deletion is successful, False otherwise, None if not implemented.