I am getting a empty response when querying my vector database with using Mixtral-8x7b with llamaindex.
I would like to know if the parameters and arguments is the right way to initialize the Mixtral LLM model. Please advise
def completion_to_prompt(completion):
return f"[INST] {completion} [/INST]"
def messages_to_prompt(messages):
messages_str ="/n".join([str(x) for x in messages])
return f"[INST] {messages_str} [/INST] "
llm = HuggingFaceLLM(
model_name="TheBloke/Mixtral-8x7B-v0.1-GPTQ",
tokenizer_name="TheBloke/Mixtral-8x7B-v0.1-GPTQ",
query_wrapper_prompt=PromptTemplate("[INST] {query_str} [/INST]"),
context_window=3900,
max_new_tokens=512,
# tokenizer_kwargs={},
generate_kwargs={"temperature": 0.1, "top_k": 50, "top_p": 0.7, "do_sample":True},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
device_map="auto",
)