I am trying to create my first LLM aplicacion using Open source models and LlamaIndex. For some reason, although I am not using OpenAI API I get the following error:

  File "/apps/anaconda3/envs/llamaindex-test/lib/python3.12/site-packages/openai/_base_client.py", line 988, in _request
raise self._make_status_error_from_response(err.response) from None
openai.RateLimitError: Error code: 429 - {'error': {'message': 'Your account is not active, please check your billing details on our website.', 'type': 'billing_not_active', 'param': None, 'code': 'billing_not_active'}}

Also, if I remove the openAI API Key I get another error saying that I need to set the key but I don't want to use any OPENAI features, just Hugging face models.

The code I have for now:

import os
from pathlib import Path
import openai
import chromadb

from llama_index.core import VectorStoreIndex, 
ServiceContext, SimpleDirectoryReader
from llama_index.core.node_parser import 
SentenceSplitter
from llama_index.core import Settings
from llama_index.readers.file import CSVReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.langchain import LangchainEmbedding


from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer

os.environ['CUDA_AVAILABLE_DEVIDES'] = '0'
#openai.api_key = 'MY-KEY'

documents = SimpleDirectoryReader("data").load_data()

text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20)

llm = LlamaCPP(
model_url="url",
context_window=32000,
max_new_tokens=1024,
verbose=True
)
embedding_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name="BAAI/bge-m3"))


AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta").encode

service_context = ServiceContext.from_defaults(
llm=llm,
embed_model='local:BAAI/bge-m3',
system_prompt="""PROMPT"""
)

index = VectorStoreIndex.from_documents(documents, show_progress=True)


index.storage_context.persist(persist_dir="llamaindex_vault")

query_engine = index.as_query_engine("¿Quienes pueden ser beneficiarios de la prestación del ingreso mínimo vital?")
0

There are 0 best solutions below