Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jun 17, 2024
1 parent c05b70b commit cc937cb
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 32 deletions.
4 changes: 4 additions & 0 deletions comps/embeddings/langchain-mosec/README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
# build Mosec docker image

```
cd docker
docker build -t mosec:latest .
```

# launch Mosec docker container

```
docker run -p $your_port:8000 -v ./data:/data --name mosec_server
```

# launch microservice at port 6000 by default

```
OPENAI_API_BASE=http://localhost:$your_port python embedding_mosec.py
```

# run client test

```
curl localhost:6000/v1/embeddings \
-X POST \
Expand Down
3 changes: 3 additions & 0 deletions comps/embeddings/langchain-mosec/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

From ubuntu:22.04
ARG DEBIAN_FRONTEND=noninteractive

Expand Down
11 changes: 9 additions & 2 deletions comps/embeddings/langchain-mosec/docker/README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,39 @@
# Embedding Server

## 1. Introduction

This service has an OpenAI compatible restful API to extract text features.
It is dedicated to be used on Xeon to accelerate embedding model serving.
Currently the local model is BGE-large-zh.

## 2. Quick Start

### 2.1 Build Docker image

```shell
docker build -t embedding:latest .
```

### 2.2 Launch server

```shell
docker run -itd -p 8000:8000 embedding:latest
```

### 2.3 Client test

- Restful API by curl

```shell
curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/root/bge-large-zh/", "input": "hello world"}'
```

- generate embedding from python

```python
DEFAULT_MODEL = "/root/bge-large-zh/"
SERVICE_URL="http://127.0.0.1:8000"
INPUT_STR="Hello world!"
SERVICE_URL = "http://127.0.0.1:8000"
INPUT_STR = "Hello world!"

client = Client(api_key="fake", base_url=SERVICE_URL)
emb = client.embeddings.create(
Expand Down
29 changes: 11 additions & 18 deletions comps/embeddings/langchain-mosec/docker/server-ipex.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import base64
import os
from typing import List, Union

import intel_extension_for_pytorch as ipex
import numpy as np
import torch # type: ignore
import torch.nn.functional as F # type: ignore
import transformers # type: ignore
from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage

from mosec import ClientError, Runtime, Server, Worker

import intel_extension_for_pytorch as ipex

DEFAULT_MODEL = "/root/bge-large-zh/"


Expand All @@ -20,9 +21,7 @@ def __init__(self):
self.model_name = os.environ.get("EMB_MODEL", DEFAULT_MODEL)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name)
self.model = transformers.AutoModel.from_pretrained(self.model_name)
self.device = (
torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
)
self.device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"

self.model = self.model.to(self.device)
self.model.eval()
Expand All @@ -39,25 +38,19 @@ def __init__(self):
self.model = torch.jit.freeze(self.model)
self.model(d, t, m)

def get_embedding_with_token_count(
self, sentences: Union[str, List[Union[str, List[int]]]]
):
def get_embedding_with_token_count(self, sentences: Union[str, List[Union[str, List[int]]]]):
# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
# First element of model_output contains all token embeddings
token_embeddings = model_output['last_hidden_state']
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
token_embeddings = model_output["last_hidden_state"]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)

# Tokenize sentences
# TODO: support `List[List[int]]` input
encoded_input = self.tokenizer(
sentences, padding=True, truncation=True, return_tensors="pt"
)
encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
inputs = encoded_input.to(self.device)
token_count = inputs["attention_mask"].sum(dim=1).tolist()
# Compute token embeddings
Expand Down Expand Up @@ -92,12 +85,12 @@ def forward(self, data: List[EmbeddingRequest]) -> List[EmbeddingResponse]:
resp = []
emb_idx = 0
for lens in inputs_lens:
token_count = sum(token_cnt[emb_idx:emb_idx+lens])
token_count = sum(token_cnt[emb_idx : emb_idx + lens])
resp.append(
EmbeddingResponse(
data=[
EmbeddingData(embedding=emb, index=i)
for i, emb in enumerate(embeddings[emb_idx:emb_idx+lens])
for i, emb in enumerate(embeddings[emb_idx : emb_idx + lens])
],
model=self.model_name,
usage=TokenUsage(
Expand Down
6 changes: 4 additions & 2 deletions comps/embeddings/langchain-mosec/docker/test-embedding.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""OpenAI embedding client example."""

from openai import Client

DEFAULT_MODEL = "/root/bge-large-zh/"
SERVICE_URL="http://127.0.0.1:8000"
INPUT_STR="Hello world!"
SERVICE_URL = "http://127.0.0.1:8000"
INPUT_STR = "Hello world!"

client = Client(api_key="fake", base_url=SERVICE_URL)
emb = client.embeddings.create(
Expand Down
18 changes: 8 additions & 10 deletions comps/embeddings/langchain-mosec/embedding_mosec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import os
import time

from typing import List, Optional
from langsmith import traceable

from langchain_community.embeddings import OpenAIEmbeddings
from langsmith import traceable

from comps import (
EmbedDoc768,
Expand All @@ -18,15 +18,14 @@
statistics_dict,
)


class MosecEmbeddings(OpenAIEmbeddings):
def _get_len_safe_embeddings(
self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
) -> List[List[float]]:
_chunk_size = chunk_size or self.chunk_size
batched_embeddings: List[List[float]] = []
response = self.client.create(
input=texts, **self._invocation_params
)
response = self.client.create(input=texts, **self._invocation_params)
if not isinstance(response, dict):
response = response.model_dump()
batched_embeddings.extend(r["embedding"] for r in response["data"])
Expand All @@ -36,16 +35,15 @@ def _get_len_safe_embeddings(
def empty_embedding() -> List[float]:
nonlocal _cached_empty_embedding
if _cached_empty_embedding is None:
average_embedded = self.client.create(
input="", **self._invocation_params
)
average_embedded = self.client.create(input="", **self._invocation_params)
if not isinstance(average_embedded, dict):
average_embedded = average_embedded.model_dump()
_cached_empty_embedding = average_embedded["data"][0]["embedding"]
return _cached_empty_embedding

return [e if e is not None else empty_embedding() for e in batched_embeddings]


@register_microservice(
name="opea_service@embedding_mosec",
service_type=ServiceType.EMBEDDING,
Expand All @@ -70,7 +68,7 @@ def embedding(input: TextDoc) -> EmbedDoc768:
MOSEC_API_BASE = os.environ.get("MOSEC_API_BASE", "http://127.0.0.1:8080")
os.environ["OPENAI_API_BASE"] = MOSEC_API_BASE
os.environ["OPENAI_API_KEY"] = "Dummy key"
MODEL_ID="/root/bge-large-zh"
MODEL_ID = "/root/bge-large-zh"
embeddings = MosecEmbeddings(model=MODEL_ID)
print("Mosec Embedding initialized.")
opea_microservices["opea_service@embedding_mosec"].start()

0 comments on commit cc937cb

Please sign in to comment.