Nvidia (original) (raw)

A component for embedding documents using embedding models provided byNVIDIA NIMs.

Usage example:

from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder

doc = Document(content="I love pizza!")

text_embedder = NvidiaDocumentEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
text_embedder.warm_up()

result = document_embedder.run([doc])
print(result["documents"][0].embedding)
def __init__(model: Optional[str] = None,
             api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
             api_url: str = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
             prefix: str = "",
             suffix: str = "",
             batch_size: int = 32,
             progress_bar: bool = True,
             meta_fields_to_embed: Optional[List[str]] = None,
             embedding_separator: str = "\n",
             truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
             timeout: Optional[float] = None)

Create a NvidiaTextEmbedder component.

Arguments:

Set default model in local NIM mode.

Initializes the component.

def to_dict() -> Dict[str, Any]

Serializes the component to a dictionary.

Returns:

Dictionary with serialized data.

@property
def available_models() -> List[Model]

Get a list of available models that work with NvidiaDocumentEmbedder.

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "NvidiaDocumentEmbedder"

Deserializes the component from a dictionary.

Arguments:

Returns:

The deserialized component.

@component.output_types(documents=List[Document], meta=Dict[str, Any])
def run(documents: List[Document])

Embed a list of Documents.

The embedding of each Document is stored in the embedding field of the Document.

Arguments:

Raises:

Returns:

A dictionary with the following keys and values:

A component for embedding strings using embedding models provided byNVIDIA NIMs.

For models that differentiate between query and document inputs, this component embeds the input string as a query.

Usage example:

from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder

text_to_embed = "I love pizza!"

text_embedder = NvidiaTextEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia")
text_embedder.warm_up()

print(text_embedder.run(text_to_embed))
def __init__(model: Optional[str] = None,
             api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
             api_url: str = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
             prefix: str = "",
             suffix: str = "",
             truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
             timeout: Optional[float] = None)

Create a NvidiaTextEmbedder component.

Arguments:

Set default model in local NIM mode.

Initializes the component.

def to_dict() -> Dict[str, Any]

Serializes the component to a dictionary.

Returns:

Dictionary with serialized data.

@property
def available_models() -> List[Model]

Get a list of available models that work with NvidiaTextEmbedder.

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder"

Deserializes the component from a dictionary.

Arguments:

Returns:

The deserialized component.

@component.output_types(embedding=List[float], meta=Dict[str, Any])
def run(text: str)

Embed a string.

Arguments:

Raises:

Returns:

A dictionary with the following keys and values:

Specifies how inputs to the NVIDIA embedding components are truncated. If START, the input will be truncated from the start. If END, the input will be truncated from the end. If NONE, an error will be returned (if the input is too long).

@classmethod
def from_str(cls, string: str) -> "EmbeddingTruncateMode"

Create an truncate mode from a string.

Arguments:

Returns:

Truncate mode.

Generates text using generative models hosted withNVIDIA NIM on the NVIDIA API Catalog.

from haystack_integrations.components.generators.nvidia import NvidiaGenerator

generator = NvidiaGenerator(
    model="meta/llama3-70b-instruct",
    model_arguments={
        "temperature": 0.2,
        "top_p": 0.7,
        "max_tokens": 1024,
    },
)
generator.warm_up()

result = generator.run(prompt="What is the answer?")
print(result["replies"])
print(result["meta"])
print(result["usage"])

You need an NVIDIA API key for this component to work.

def __init__(model: Optional[str] = None,
             api_url: str = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
             api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
             model_arguments: Optional[Dict[str, Any]] = None,
             timeout: Optional[float] = None)

Create a NvidiaGenerator component.

Arguments:

Set default model in local NIM mode.

Initializes the component.

def to_dict() -> Dict[str, Any]

Serializes the component to a dictionary.

Returns:

Dictionary with serialized data.

@property
def available_models() -> List[Model]

Get a list of available models that work with ChatNVIDIA.

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator"

Deserializes the component from a dictionary.

Arguments:

Returns:

Deserialized component.

@component.output_types(replies=List[str], meta=List[Dict[str, Any]])
def run(prompt: str)

Queries the model with the provided prompt.

Arguments:

Returns:

A dictionary with the following keys:

A component for ranking documents using ranking models provided byNVIDIA NIMs.

Usage example:

from haystack_integrations.components.rankers.nvidia import NvidiaRanker
from haystack import Document
from haystack.utils import Secret

ranker = NvidiaRanker(
    model="nvidia/nv-rerankqa-mistral-4b-v3",
    api_key=Secret.from_env_var("NVIDIA_API_KEY"),
)
ranker.warm_up()

query = "What is the capital of Germany?"
documents = [
    Document(content="Berlin is the capital of Germany."),
    Document(content="The capital of Germany is Berlin."),
    Document(content="Germany's capital is Berlin."),
]

result = ranker.run(query, documents, top_k=2)
print(result["documents"])
def __init__(model: Optional[str] = None,
             truncate: Optional[Union[RankerTruncateMode, str]] = None,
             api_url: str = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
             api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
             top_k: int = 5,
             query_prefix: str = "",
             document_prefix: str = "",
             meta_fields_to_embed: Optional[List[str]] = None,
             embedding_separator: str = "\n",
             timeout: Optional[float] = None)

Create a NvidiaRanker component.

Arguments:

def to_dict() -> Dict[str, Any]

Serialize the ranker to a dictionary.

Returns:

A dictionary containing the ranker's attributes.

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "NvidiaRanker"

Deserialize the ranker from a dictionary.

Arguments:

Returns:

The deserialized ranker.

Initialize the ranker.

Raises:

@component.output_types(documents=List[Document])
def run(query: str,
        documents: List[Document],
        top_k: Optional[int] = None) -> Dict[str, List[Document]]

Rank a list of documents based on a given query.

Arguments:

Raises:

Returns:

A dictionary containing the ranked documents.

Specifies how inputs to the NVIDIA ranker components are truncated. If NONE, the input will not be truncated and an error returned instead. If END, the input will be truncated from the end.

@classmethod
def from_str(cls, string: str) -> "RankerTruncateMode"

Create an truncate mode from a string.

Arguments:

Returns:

Truncate mode.