Embeddings¶

`pgraf.embeddings` ¶

`Engine` ¶

Bases: Enum

Enum for the available embedding engines

Source code in pgraf/embeddings.py

class Engine(enum.Enum):
    """Enum for the available embedding engines"""

    HUGGING_FACE = 'hugging-face'
    OPENAI = 'openai'

`HuggingFace` ¶

Handles the generation of vector embeddings for text content.

This class provides functionality to convert text into vector embeddings using sentence transformers. It handles chunking of text to ensure optimal embedding generation.

Parameters:

Name	Type	Description	Default
`model`	`str \| None`	The sentence transformer model to use for embeddings	`DEFAULT_HUGGING_FACE_MODEL`

Source code in pgraf/embeddings.py

class HuggingFace:
    """Handles the generation of vector embeddings for text content.

    This class provides functionality to convert text into vector embeddings
    using sentence transformers. It handles chunking of text to ensure
    optimal embedding generation.

    Args:
        model: The sentence transformer model to use for embeddings
    """

    def __init__(self, model: str | None = DEFAULT_HUGGING_FACE_MODEL) -> None:
        """Initialize the embeddings generator with the specified model.

        Args:
            model: The sentence transformer model to use
                (defaults to 'all-MiniLM-L6-v2')
        """
        self.transformer = sentence_transformers.SentenceTransformer(
            model or DEFAULT_HUGGING_FACE_MODEL
        )

    def get(self, value: str) -> list[numpy.ndarray]:
        """Generate embeddings for the provided text value.

        The text is automatically chunked into manageable pieces
        using sentence boundaries and maximum word count.

        Args:
            value: The text to generate embeddings for

        Returns:
            A list of numpy arrays containing the embeddings for each chunk
        """
        embeddings: list[numpy.ndarray] = []
        for chunk in _chunk_text(value):
            result: numpy.ndarray = self.transformer.encode(
                chunk, convert_to_numpy=True, convert_to_tensor=False
            )
            embeddings.append(result)
        return embeddings

`init(model=DEFAULT_HUGGING_FACE_MODEL)` ¶

Initialize the embeddings generator with the specified model.

Parameters:

Name	Type	Description	Default
`model`	`str \| None`	The sentence transformer model to use (defaults to 'all-MiniLM-L6-v2')	`DEFAULT_HUGGING_FACE_MODEL`

Source code in pgraf/embeddings.py

def __init__(self, model: str | None = DEFAULT_HUGGING_FACE_MODEL) -> None:
    """Initialize the embeddings generator with the specified model.

    Args:
        model: The sentence transformer model to use
            (defaults to 'all-MiniLM-L6-v2')
    """
    self.transformer = sentence_transformers.SentenceTransformer(
        model or DEFAULT_HUGGING_FACE_MODEL
    )

`get(value)` ¶

Generate embeddings for the provided text value.

The text is automatically chunked into manageable pieces using sentence boundaries and maximum word count.

Parameters:

Name	Type	Description	Default
`value`	`str`	The text to generate embeddings for	required

Returns:

Type	Description
`list[ndarray]`	A list of numpy arrays containing the embeddings for each chunk

Source code in pgraf/embeddings.py

def get(self, value: str) -> list[numpy.ndarray]:
    """Generate embeddings for the provided text value.

    The text is automatically chunked into manageable pieces
    using sentence boundaries and maximum word count.

    Args:
        value: The text to generate embeddings for

    Returns:
        A list of numpy arrays containing the embeddings for each chunk
    """
    embeddings: list[numpy.ndarray] = []
    for chunk in _chunk_text(value):
        result: numpy.ndarray = self.transformer.encode(
            chunk, convert_to_numpy=True, convert_to_tensor=False
        )
        embeddings.append(result)
    return embeddings

`OpenAI` ¶

Handles the generation of vector embeddings for text content using the OpenAI client

Source code in pgraf/embeddings.py

class OpenAI:
    """Handles the generation of vector embeddings for text content using the
    OpenAI client
    """

    def __init__(
        self,
        model: str | None = DEFAULT_OPENAI_MODEL,
        api_key: str | None = None,
    ) -> None:
        """Initialize the embeddings generator with the specified model."""
        self.client = openai.OpenAI(api_key=api_key)
        self.model = model or DEFAULT_OPENAI_MODEL

    def get(self, value: str) -> list[numpy.ndarray]:
        """Generate embeddings for the provided text value.

        The text is automatically chunked into manageable pieces
        using sentence boundaries and maximum word count.

        Args:
            value: The text to generate embeddings for

        Returns:
            A list of numpy arrays containing the embeddings for each chunk
        """
        embeddings: list[numpy.ndarray] = []
        for chunk in _chunk_text(value):
            response = self.client.embeddings.create(
                input=chunk, model=self.model
            )
            embeddings.append(numpy.array(response.data[0].embedding))
        return embeddings

`init(model=DEFAULT_OPENAI_MODEL, api_key=None)` ¶

Initialize the embeddings generator with the specified model.

Source code in pgraf/embeddings.py

def __init__(
    self,
    model: str | None = DEFAULT_OPENAI_MODEL,
    api_key: str | None = None,
) -> None:
    """Initialize the embeddings generator with the specified model."""
    self.client = openai.OpenAI(api_key=api_key)
    self.model = model or DEFAULT_OPENAI_MODEL

`get(value)` ¶

Generate embeddings for the provided text value.

The text is automatically chunked into manageable pieces using sentence boundaries and maximum word count.

Parameters:

Name	Type	Description	Default
`value`	`str`	The text to generate embeddings for	required

Returns:

Type	Description
`list[ndarray]`	A list of numpy arrays containing the embeddings for each chunk

Source code in pgraf/embeddings.py

def get(self, value: str) -> list[numpy.ndarray]:
    """Generate embeddings for the provided text value.

    The text is automatically chunked into manageable pieces
    using sentence boundaries and maximum word count.

    Args:
        value: The text to generate embeddings for

    Returns:
        A list of numpy arrays containing the embeddings for each chunk
    """
    embeddings: list[numpy.ndarray] = []
    for chunk in _chunk_text(value):
        response = self.client.embeddings.create(
            input=chunk, model=self.model
        )
        embeddings.append(numpy.array(response.data[0].embedding))
    return embeddings

Embeddings¶

pgraf.embeddings ¶

Engine ¶

HuggingFace ¶

__init__(model=DEFAULT_HUGGING_FACE_MODEL) ¶

get(value) ¶

OpenAI ¶

__init__(model=DEFAULT_OPENAI_MODEL, api_key=None) ¶

get(value) ¶

`pgraf.embeddings` ¶

`Engine` ¶

`HuggingFace` ¶

`init(model=DEFAULT_HUGGING_FACE_MODEL)` ¶

`get(value)` ¶

`OpenAI` ¶

`init(model=DEFAULT_OPENAI_MODEL, api_key=None)` ¶

`get(value)` ¶