Manishkumaryadav's picture
Update app.py
da39e0a verified
import os
import streamlit as st
import fitz
import sqlite3
import pdfplumber
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter # Now works
@st.cache_resource
def init_system():
process_pdf("Q1FY24.pdf")
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
vector_store = FAISS.load_local("faiss_index", embeddings)
conn = sqlite3.connect('metric_table.db')
return vector_store, conn
def process_pdf(pdf_path):
# Structured Data Extraction
conn = sqlite3.connect('metric_table.db')
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
(metric TEXT, quarter TEXT, value REAL)''')
# Example metric insertion (add full extraction logic)
cursor.execute("INSERT INTO metric_table VALUES ('Revenue', 'Q1 FY24', 19.8)")
conn.commit()
# Unstructured Data Processing
full_text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
full_text += page.get_text()
# Text Chunking & Embedding
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
chunks = splitter.split_text(full_text)
embeddings = OpenAIEmbeddings(
openai_api_key=os.getenv("OPENAI_API_KEY"),
model="text-embedding-ada-002"
)
FAISS.from_texts(chunks, embeddings).save_local("faiss_index")
# Streamlit UI
def main():
st.title("Fundrev Financial Analyzer")
# Initialize system
vector_store, conn = init_system()
query = st.text_input("Ask financial question:")
if query:
# Structured data queries
if any(kw in query.lower() for kw in ["trend", "margin", "revenue"]):
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
results = cursor.fetchall()
st.table(results if results else "No matching metrics found")
# Unstructured data queries
else:
docs = vector_store.similarity_search(query, k=1)
st.write(docs[0].page_content if docs else "No relevant information found")
if __name__ == "__main__":
main()