I am currently working on a project where I am using ChromaDB to store vector embeddings generated from textual data. The vector embeddings are obtained using Langchain with OpenAI embeddings. However, I can't find a meaningful way to visualize these embeddings.
Here is the relevant part of my code:
import os
import chromadb
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import pypdf
import numpy as np
# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = "openai_api_key"
# Initialize models and embeddings
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
embeddings = OpenAIEmbeddings()
# Load PDF file and split into chunks
loader = PyPDFLoader("./file.pdf")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
splits = text_splitter.split_documents(docs)
# Create ChromaDB and add embeddings
vector_store = Chroma.from_documents(
documents=splits,
embedding=embeddings,
persist_directory="./chroma_db"
)
vector_store.persist()
# Function for similarity search
def query_search(query):
# Load persisted vector store
vector_store_retriever = Chroma(
persist_directory="./files_db",
embedding_function=embeddings)
# Create a Retriever for the vector store
retriever = vector_store_retriever.as_retriever(search_kwargs={"k": 2})
# Make a chain to answer question from docs
qa_chain = RetrievalQA.from_chain_type(
llm=model,
chain_type="stuff",
retriever=retriever,
verbose=True,
return_source_documents=True
)
response = qa_chain.invoke(query)
print(response["result"])
query = "Query"
query_search(query)
I have tried various methods to visualize these embeddings, but none seem to work effectively. Can anyone provide guidance on how to effectively visualize vector embeddings stored in ChromaDB? Any help or suggestions would be greatly appreciated.