Source code for langchain_postgres.v2.indexes

"""Index class to add vector indexes on the PGVectorStore.

Learn more about vector indexes at https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
"""

import enum
import re
import warnings
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Optional


[docs] @dataclass class StrategyMixin: operator: str search_function: str index_function: str
[docs] class DistanceStrategy(StrategyMixin, enum.Enum): """Enumerator of the Distance strategies.""" EUCLIDEAN = "<->", "l2_distance", "vector_l2_ops" COSINE_DISTANCE = "<=>", "cosine_distance", "vector_cosine_ops" INNER_PRODUCT = "<#>", "inner_product", "vector_ip_ops"
DEFAULT_DISTANCE_STRATEGY: DistanceStrategy = DistanceStrategy.COSINE_DISTANCE DEFAULT_INDEX_NAME_SUFFIX: str = "langchainvectorindex"
[docs] def validate_identifier(identifier: str) -> None: if re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier) is None: raise ValueError( f"Invalid identifier: {identifier}. Identifiers must start with a letter or underscore, and subsequent characters can be letters, digits, or underscores." )
[docs] @dataclass class BaseIndex(ABC): """ Abstract base class for defining vector indexes. Attributes: name (Optional[str]): A human-readable name for the index. Defaults to None. index_type (str): A string identifying the type of index. Defaults to "base". distance_strategy (DistanceStrategy): The strategy used to calculate distances between vectors in the index. Defaults to DistanceStrategy.COSINE_DISTANCE. partial_indexes (Optional[list[str]]): A list of names of partial indexes. Defaults to None. extension_name (Optional[str]): The name of the extension to be created for the index, if any. Defaults to None. """ name: Optional[str] = None index_type: str = "base" distance_strategy: DistanceStrategy = field( default_factory=lambda: DistanceStrategy.COSINE_DISTANCE ) partial_indexes: Optional[list[str]] = None extension_name: Optional[str] = None
[docs] @abstractmethod def index_options(self) -> str: """Set index query options for vector store initialization.""" raise NotImplementedError( "index_options method must be implemented by subclass" )
[docs] def get_index_function(self) -> str: return self.distance_strategy.index_function
def __post_init__(self) -> None: """Check if initialization parameters are valid. Raises: ValueError: extension_name is a valid postgreSQL identifier """ if self.extension_name: validate_identifier(self.extension_name) if self.index_type: validate_identifier(self.index_type)
[docs] @dataclass class ExactNearestNeighbor(BaseIndex): index_type: str = "exactnearestneighbor"
[docs] @dataclass class QueryOptions(ABC):
[docs] @abstractmethod def to_parameter(self) -> list[str]: """Convert index attributes to list of configurations.""" raise NotImplementedError("to_parameter method must be implemented by subclass")
[docs] @abstractmethod def to_string(self) -> str: """Convert index attributes to string.""" raise NotImplementedError("to_string method must be implemented by subclass")
[docs] @dataclass class HNSWIndex(BaseIndex): index_type: str = "hnsw" m: int = 16 ef_construction: int = 64
[docs] def index_options(self) -> str: """Set index query options for vector store initialization.""" return f"(m = {self.m}, ef_construction = {self.ef_construction})"
[docs] @dataclass class HNSWQueryOptions(QueryOptions): ef_search: int = 40
[docs] def to_parameter(self) -> list[str]: """Convert index attributes to list of configurations.""" return [f"hnsw.ef_search = {self.ef_search}"]
[docs] def to_string(self) -> str: """Convert index attributes to string.""" warnings.warn( "to_string is deprecated, use to_parameter instead.", DeprecationWarning, ) return f"hnsw.ef_search = {self.ef_search}"
[docs] @dataclass class IVFFlatIndex(BaseIndex): index_type: str = "ivfflat" lists: int = 100
[docs] def index_options(self) -> str: """Set index query options for vector store initialization.""" return f"(lists = {self.lists})"
[docs] @dataclass class IVFFlatQueryOptions(QueryOptions): probes: int = 1
[docs] def to_parameter(self) -> list[str]: """Convert index attributes to list of configurations.""" return [f"ivfflat.probes = {self.probes}"]
[docs] def to_string(self) -> str: """Convert index attributes to string.""" warnings.warn( "to_string is deprecated, use to_parameter instead.", DeprecationWarning, ) return f"ivfflat.probes = {self.probes}"