How to change default embedding vector size of a Llama 2 based model?

56 Views Asked by At

I am running fastchat server to create embeddings.

pip install "fschat[model_worker,api]"
python -m fastchat.serve.controller --host 0.0.0.0
python -m fastchat.serve.model_worker --model-path jphme/em_german_7b_v01 --host 0.0.0.0
python -m fastchat.serve.openai_api_server --host 0.0.0.0 --port 8000

This is code from this https://medium.com/@zhaozhiming/using-llamaindex-with-elasticsearch-for-enhanced-retrieval-augmented-generation-rag-2f37646daeef tutorial.

The only difference is that I want to use different model, that is jphme/em_german_7b_v01

from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from typing import Any, List
import requests
import os
from llama_index.core import ServiceContext
from llama_index.core import SimpleDirectoryReader

class CustomEmbeddings(BaseEmbedding):
    """Custom class for embeddings.
    Args:
        model_name (str): Model for embedding.
        url(str): Url for embedding model.
    """
    _model_name: str = PrivateAttr()
    _url: str = PrivateAttr()
    def __init__(self, model_name: str, url: str, **kwargs: Any) -> None:
        self._model_name = model_name
        self._url = url
        super().__init__(**kwargs)
    @classmethod
    def class_name(cls) -> str:
        return "custom_embedding"
    def _aget_query_embedding(self, query: str):
        return get_embedding(text=query, model_uid=self._model_name, url=self._url)
    def _aget_text_embedding(self, text: str):
        return get_embedding(text=text, model_uid=self._model_name, url=self._url)
    def _get_query_embedding(self, query: str):
        return get_embedding(text=query, model_uid=self._model_name, url=self._url)
    def _get_text_embedding(self, text: str):
        return get_embedding(text=text, model_uid=self._model_name, url=self._url)
    def _get_text_embeddings(self, texts: List[str]):
        return get_embeddings(
            list_of_text=texts, model_uid=self._model_name, url=self._url
        )
    


    
def send_request(model_uid: str, text: str, url: str):
    url = f"{url}/v1/embeddings"
    request_body = {"model": model_uid, "input": text}
    response = requests.post(url, json=request_body)
    if response.status_code != 200:
        raise RuntimeError(
            f"Failed to create the embeddings, detail: {(response.json()) if response.json() else str}"
        )
    return response.json()
def get_embedding(text: str, model_uid: str, url: str):
    """Get embedding."""
    text = text.replace("\n", " ")
    response_data = send_request(model_uid, text, url)
    return response_data["data"][0]["embedding"]
def get_embeddings(
    list_of_text: List[str], model_uid: str, url: str
):
    """Get embeddings."""
    assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
    list_of_text = [text.replace("\n", " ") for text in list_of_text]
    response_data = send_request(model_uid, list_of_text, url)
    return [d["embedding"] for d in response_data["data"]]


es_url = "http://localhost:9200"
index_name = 'nzz_articles_2'
store = ElasticsearchStore(
   es_url=es_url,
   index_name=index_name,
)
storage_context = StorageContext.from_defaults(vector_store=store)
embedding_model_url = "http://localhost:8000"
embedding_model_name = "em_german_7b_v01"
os.environ['OPENAI_API_KEY'] =  '{your OPEN AI API KEY'

service_context = ServiceContext.from_defaults(
    embed_model=CustomEmbeddings(
        url=embedding_model_url, model_name=embedding_model_name
    ),
)

data_path = "C:\\Users\\swiat\\Documents\\GitHub\\streamlit\\data2" # Directory where paul_graham_essay.txt is located
documents = SimpleDirectoryReader(data_path).load_data()


index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True
)

print("Indexing complete")
print(index)


query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(f"response: {response}")

I am getting an error elasticsearch.BadRequestError: BadRequestError(400, 'mapper_parsing_exception', 'The number of dimensions for field [embedding] should be in the range [1, 2048] but was [4096]'

How can I fix it?

0

There are 0 best solutions below