I am running fastchat server to create embeddings.
pip install "fschat[model_worker,api]"
python -m fastchat.serve.controller --host 0.0.0.0
python -m fastchat.serve.model_worker --model-path jphme/em_german_7b_v01 --host 0.0.0.0
python -m fastchat.serve.openai_api_server --host 0.0.0.0 --port 8000
This is code from this https://medium.com/@zhaozhiming/using-llamaindex-with-elasticsearch-for-enhanced-retrieval-augmented-generation-rag-2f37646daeef tutorial.
The only difference is that I want to use different model, that is jphme/em_german_7b_v01
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from typing import Any, List
import requests
import os
from llama_index.core import ServiceContext
from llama_index.core import SimpleDirectoryReader
class CustomEmbeddings(BaseEmbedding):
"""Custom class for embeddings.
Args:
model_name (str): Model for embedding.
url(str): Url for embedding model.
"""
_model_name: str = PrivateAttr()
_url: str = PrivateAttr()
def __init__(self, model_name: str, url: str, **kwargs: Any) -> None:
self._model_name = model_name
self._url = url
super().__init__(**kwargs)
@classmethod
def class_name(cls) -> str:
return "custom_embedding"
def _aget_query_embedding(self, query: str):
return get_embedding(text=query, model_uid=self._model_name, url=self._url)
def _aget_text_embedding(self, text: str):
return get_embedding(text=text, model_uid=self._model_name, url=self._url)
def _get_query_embedding(self, query: str):
return get_embedding(text=query, model_uid=self._model_name, url=self._url)
def _get_text_embedding(self, text: str):
return get_embedding(text=text, model_uid=self._model_name, url=self._url)
def _get_text_embeddings(self, texts: List[str]):
return get_embeddings(
list_of_text=texts, model_uid=self._model_name, url=self._url
)
def send_request(model_uid: str, text: str, url: str):
url = f"{url}/v1/embeddings"
request_body = {"model": model_uid, "input": text}
response = requests.post(url, json=request_body)
if response.status_code != 200:
raise RuntimeError(
f"Failed to create the embeddings, detail: {(response.json()) if response.json() else str}"
)
return response.json()
def get_embedding(text: str, model_uid: str, url: str):
"""Get embedding."""
text = text.replace("\n", " ")
response_data = send_request(model_uid, text, url)
return response_data["data"][0]["embedding"]
def get_embeddings(
list_of_text: List[str], model_uid: str, url: str
):
"""Get embeddings."""
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
list_of_text = [text.replace("\n", " ") for text in list_of_text]
response_data = send_request(model_uid, list_of_text, url)
return [d["embedding"] for d in response_data["data"]]
es_url = "http://localhost:9200"
index_name = 'nzz_articles_2'
store = ElasticsearchStore(
es_url=es_url,
index_name=index_name,
)
storage_context = StorageContext.from_defaults(vector_store=store)
embedding_model_url = "http://localhost:8000"
embedding_model_name = "em_german_7b_v01"
os.environ['OPENAI_API_KEY'] = '{your OPEN AI API KEY'
service_context = ServiceContext.from_defaults(
embed_model=CustomEmbeddings(
url=embedding_model_url, model_name=embedding_model_name
),
)
data_path = "C:\\Users\\swiat\\Documents\\GitHub\\streamlit\\data2" # Directory where paul_graham_essay.txt is located
documents = SimpleDirectoryReader(data_path).load_data()
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
service_context=service_context,
show_progress=True
)
print("Indexing complete")
print(index)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(f"response: {response}")
I am getting an error elasticsearch.BadRequestError: BadRequestError(400, 'mapper_parsing_exception', 'The number of dimensions for field [embedding] should be in the range [1, 2048] but was [4096]'
How can I fix it?