create a langchain response in a specific language

321 Views Asked by At

I'm making a chatbot to answer questions. The input is a PDF in Indonesian and I want answers in Indonesian too. However, when I created a prompt to answer in Indonesian, the answer given was incomplete. How do I ensure the response is complete, and how do I maintain consistency in the answers? Sometimes even though it has been explained in the prompt to answer only in Indonesian, the response still answers in English.

!pip install langchain
!pip install "unstructured[all-docs]"
!sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libxml2 libxslt-dev
import nltk
nltk.download('punkt')
!pip install InstructorEmbedding
!pip install sentence-transformers
!pip install faiss-cpu
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
import os
from langchain.llms import HuggingFaceHub
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.chat import ChatPromptTemplate

loader = UnstructuredFileLoader("/content/ASIP4204-M1.pdf")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(documents=docs)
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents=texts, embedding=embeddings)
retriever = db.as_retriever()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.1}
)

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Just answers in Indonesian language.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever= db.as_retriever(),
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
)
qa.run("Apa itu data?")

I've tried several settings such as changing chunk size, embedding model, vector store, and changing llm. but the answer generated remains the same, not full. When I asked "What is data" The resulting answer is like this

This is an example of the resulting response

0

There are 0 best solutions below