Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| from dotenv import load_dotenv | |
| from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader | |
| from llama_index.core.readers.base import BaseReader | |
| from llama_index.readers.file.paged_csv.base import PagedCSVReader | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.vector_stores.faiss import FaissVectorStore | |
| from llama_index.core.ingestion import IngestionPipeline | |
| from langchain_community.document_loaders.csv_loader import CSVLoader | |
| from langchain_community.vectorstores import FAISS as LangChainFAISS | |
| from langchain.chains import create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_openai import OpenAIEmbeddings, ChatOpenAI | |
| import faiss | |
| # Load environment variables | |
| os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") | |
| # Global settings for LlamaIndex | |
| EMBED_DIMENSION = 512 | |
| Settings.llm = OpenAI(model="gpt-3.5-turbo") | |
| Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION) | |
| # Streamlit app | |
| st.title("Chat w CSV Files - LangChain Vs LlamaIndex ") | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) | |
| if uploaded_file: | |
| # Save the uploaded file temporarily | |
| temp_file_path = f"temp_{uploaded_file.name}" | |
| with open(temp_file_path, "wb") as temp_file: | |
| temp_file.write(uploaded_file.getbuffer()) | |
| # Read and preview CSV data | |
| data = pd.read_csv(temp_file_path) | |
| st.write("Preview of uploaded data:") | |
| st.dataframe(data) | |
| # Tabs | |
| tab1, tab2 = st.tabs(["Chat w CSV using LangChain", "Chat w CSV using LlamaIndex"]) | |
| # LangChain Tab | |
| with tab1: | |
| st.subheader("LangChain Query") | |
| loader = CSVLoader(file_path=temp_file_path) | |
| docs = loader.load_and_split() | |
| # Preview the first document | |
| if docs: | |
| st.write("Preview of a document chunk (LangChain):") | |
| st.text(docs[0].page_content) | |
| # LangChain FAISS VectorStore | |
| langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION) | |
| langchain_vector_store = LangChainFAISS( | |
| embedding_function=OpenAIEmbeddings(), | |
| index=langchain_index, | |
| ) | |
| langchain_vector_store.add_documents(docs) | |
| # LangChain Retrieval Chain | |
| retriever = langchain_vector_store.as_retriever() | |
| system_prompt = ( | |
| "You are an assistant for question-answering tasks. " | |
| "Use the following pieces of retrieved context to answer " | |
| "the question. If you don't know the answer, say that you " | |
| "don't know. Use three sentences maximum and keep the " | |
| "answer concise.\n\n{context}" | |
| ) | |
| prompt = ChatPromptTemplate.from_messages( | |
| [("system", system_prompt), ("human", "{input}")] | |
| ) | |
| question_answer_chain = create_stuff_documents_chain(ChatOpenAI(), prompt) | |
| langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain) | |
| # Query input for LangChain | |
| query = st.text_input("Ask a question about your data (LangChain):") | |
| if query: | |
| answer = langchain_rag_chain.invoke({"input": query}) | |
| st.write(f"Answer: {answer['answer']}") | |
| # LlamaIndex Tab | |
| with tab2: | |
| st.subheader("LlamaIndex Query") | |
| csv_reader = PagedCSVReader() | |
| reader = SimpleDirectoryReader( | |
| input_files=[temp_file_path], | |
| file_extractor={".csv": csv_reader}, | |
| ) | |
| docs = reader.load_data() | |
| # Preview the first document | |
| if docs: | |
| st.write("Preview of a document chunk (LlamaIndex):") | |
| st.text(docs[0].text) | |
| # Initialize FAISS Vector Store | |
| llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION) | |
| llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index) | |
| # Create the ingestion pipeline and process the data | |
| pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs) | |
| nodes = pipeline.run() | |
| # Create a query engine | |
| llama_index = VectorStoreIndex(nodes) | |
| query_engine = llama_index.as_query_engine(similarity_top_k=3) | |
| # Query input for LlamaIndex | |
| query = st.text_input("Ask a question about your data (LlamaIndex):") | |
| if query: | |
| response = query_engine.query(query) | |
| st.write(f"Answer: {response.response}") | |
| # Cleanup temporary file | |
| os.remove(temp_file_path) | |