Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Openai compatible embedding #1223

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions camel/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from .base import BaseEmbedding
from .mistral_embedding import MistralEmbedding
from .openai_compatible_embedding import OpenAICompatibleEmbedding
from .openai_embedding import OpenAIEmbedding
from .sentence_transformers_embeddings import SentenceTransformerEncoder
from .vlm_embedding import VisionLanguageEmbedding
Expand All @@ -23,4 +24,5 @@
"SentenceTransformerEncoder",
"VisionLanguageEmbedding",
"MistralEmbedding",
"OpenAICompatibleEmbedding",
]
91 changes: 91 additions & 0 deletions camel/embeddings/openai_compatible_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from __future__ import annotations

import os
from typing import Any, Optional

from openai import OpenAI

from camel.embeddings.base import BaseEmbedding
from camel.utils import api_keys_required


class OpenAICompatibleEmbedding(BaseEmbedding[str]):
r"""Provides text embedding functionalities supporting OpenAI
compatibility.

Args:
model_type (str): The model type to be used for text embeddings.
api_key (str): The API key for authenticating with the model service.
url (str): The url to the model service.
"""

def __init__(
self,
model_type: str,
api_key: Optional[str] = None,
url: Optional[str] = None,
) -> None:
self.model_type = model_type
self.output_dim: Optional[int] = None

self._api_key = api_key or os.environ.get(
"OPENAI_COMPATIBILIY_API_KEY"
)
self._url = url or os.environ.get("OPENAI_COMPATIBILIY_API_BASE_URL")
self._client = OpenAI(
timeout=60,
max_retries=3,
api_key=self._api_key,
base_url=self._url,
)

@api_keys_required("OPENAI_COMPATIBILIY_API_KEY")
def embed_list(
self,
objs: list[str],
**kwargs: Any,
) -> list[list[float]]:
r"""Generates embeddings for the given texts.

Args:
objs (list[str]): The texts for which to generate the embeddings.
**kwargs (Any): Extra kwargs passed to the embedding API.

Returns:
list[list[float]]: A list that represents the generated embedding
as a list of floating-point numbers.
"""

response = self._client.embeddings.create(
input=objs,
model=self.model_type,
**kwargs,
)
self.output_dim = len(response.data[0].embedding)
return [data.embedding for data in response.data]

def get_output_dim(self) -> int:
r"""Returns the output dimension of the embeddings.

Returns:
int: The dimensionality of the embedding for the current model.
"""
if self.output_dim is None:
raise ValueError(
"Output dimension is not yet determined. Call "
"'embed_list' first."
)
return self.output_dim
32 changes: 32 additions & 0 deletions examples/embeddings/openai_compatible_embedding_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========

from camel.embeddings import OpenAICompatibleEmbedding

# Set the embedding instance
nv_embed = OpenAICompatibleEmbedding(
model_type="nvidia/nv-embed-v1",
api_key="nvapi-xxx",
url="https://integrate.api.nvidia.com/v1",
)

# Embed the text
text_embeddings = nv_embed.embed_list(["What is the capital of France?"])

print(len(text_embeddings[0]))
'''
===============================================================================
4096
===============================================================================
'''
99 changes: 99 additions & 0 deletions test/embeddings/test_openai_compatible_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========

import unittest
from unittest.mock import MagicMock, patch

from camel.embeddings import OpenAICompatibleEmbedding


class TestOpenAICompatibleEmbedding(unittest.TestCase):
@patch("openai.OpenAI")
@patch.dict(
"os.environ",
{
"OPENAI_COMPATIBILIY_API_KEY": "test_api_key",
"OPENAI_COMPATIBILIY_API_BASE_URL": "http://test-url.com",
},
)
def test_embed_list(self, MockOpenAI):
# Mock the OpenAI client and its response
mock_client = MagicMock()
mock_response = MagicMock()
mock_data = MagicMock()
mock_data.embedding = [0.1, 0.2, 0.3]
mock_response.data = [mock_data]
mock_client.embeddings.create.return_value = mock_response

# Initialize the OpenAICompatibleEmbedding object
embedding = OpenAICompatibleEmbedding(
"text-embedding-model", "test_api_key", "http://test-url.com"
)
embedding._client = mock_client

# Call embed_list method
input_texts = ["Hello world", "Goodbye world"]
result = embedding.embed_list(input_texts)

# Check if the OpenAI client was called with the correct parameters
mock_client.embeddings.create.assert_called_once_with(
input=input_texts, model="text-embedding-model"
)

# Check if the result is as expected
self.assertEqual(result, [[0.1, 0.2, 0.3]])

@patch("openai.OpenAI")
@patch.dict(
"os.environ",
{
"OPENAI_COMPATIBILIY_API_KEY": "test_api_key",
"OPENAI_COMPATIBILIY_API_BASE_URL": "http://test-url.com",
},
)
def test_get_output_dim(self, MockOpenAI):
# Mock the OpenAI client and its response
mock_client = MagicMock()
mock_response = MagicMock()
mock_data = MagicMock()
mock_data.embedding = [0.1, 0.2, 0.3]
mock_response.data = [mock_data]
mock_client.embeddings.create.return_value = mock_response

# Initialize the OpenAICompatibleEmbedding object
embedding = OpenAICompatibleEmbedding(
"text-embedding-model", "test_api_key", "http://test-url.com"
)
embedding._client = mock_client

# Call embed_list to generate output_dim
embedding.embed_list(["Hello world"])

# Now test get_output_dim
self.assertEqual(embedding.get_output_dim(), 3)

def test_get_output_dim_without_embeddings(self):
# Test if ValueError is raised when get_output_dim is called before
# embed_list
embedding = OpenAICompatibleEmbedding(
"text-embedding-model", "test_api_key", "http://test-url.com"
)

with self.assertRaises(ValueError) as context:
embedding.get_output_dim()

self.assertEqual(
str(context.exception),
"Output dimension is not yet determined. Call 'embed_list' first.",
)