Skip to content

Commit

Permalink
[LLM] Add TransformersBgeEmbeddings class in `bigdl.llm.langchain.e…
Browse files Browse the repository at this point in the history
…mbeddings` (#10459)

* Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings

* Small fixes
  • Loading branch information
Oscilloscope98 authored Mar 19, 2024
1 parent 86b55fc commit 84f3e99
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
5 changes: 3 additions & 2 deletions python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
# only search the first bigdl package and end up finding only one sub-package.

from .bigdlllm import *
from .transformersembeddings import TransformersEmbeddings
from .transformersembeddings import TransformersEmbeddings, TransformersBgeEmbeddings

__all__ = [
"BigdlNativeEmbeddings",
"LlamaEmbeddings",
"BloomEmbeddings",
"GptneoxEmbeddings",
"StarcoderEmbeddings",
"TransformersEmbeddings"
"TransformersEmbeddings",
"TransformersBgeEmbeddings"
]
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
# THE SOFTWARE.

"""Wrapper around BigdlLLM embedding models."""
import torch
from typing import Any, Dict, List, Optional
import numpy as np

Expand Down Expand Up @@ -181,3 +182,14 @@ def embed_query(self, text: str) -> List[float]:
text = text.replace("\n", " ")
embedding = self.embed(text, **self.encode_kwargs)
return embedding.tolist()

# fit specific encode method for langchain.embeddings.HuggingFaceBgeEmbeddings
# TODO: directly support HuggingFaceBgeEmbeddings
class TransformersBgeEmbeddings(TransformersEmbeddings):

def embed(self, text: str, **kwargs):
input_ids = self.tokenizer.encode(text, return_tensors="pt", **kwargs)
input_ids = input_ids.to(self.model.device)
embeddings = self.model(input_ids, return_dict=False)[0].cpu()
embeddings = torch.nn.functional.normalize(embeddings[:, 0], p=2, dim=1)
return embeddings[0]

0 comments on commit 84f3e99

Please sign in to comment.