Manishkumaryadav's picture
Create app.py
7155a9f verified
raw
history blame
2.24 kB
import os
import streamlit as st
import fitz
import openai
import sqlite3
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber
# Initialize once
@st.cache_resource
def init_system():
# 1. Process PDF
process_pdf("Q1FY24.pdf")
# 2. Load pre-processed data
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
vector_store = FAISS.load_local("faiss_index", embeddings)
# 3. Connect SQL
conn = sqlite3.connect('metric_table.db')
return vector_store, conn
def process_pdf(pdf_path):
# Structured Data
conn = sqlite3.connect('metric_table.db')
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
(metric TEXT, quarter TEXT, value REAL)''')
# Unstructured Data
full_text = ""
doc = fitz.open(pdf_path)
with pdfplumber.open(pdf_path) as pdf:
for page_num, page in enumerate(pdf.pages):
# Structured extraction
if "Financial Performance Summary" in page.extract_text():
tables = page.extract_tables()
# Add to SQL (example)
# ... (Add full processing logic from previous code)
# Save vector store
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
chunks = splitter.split_text(full_text)
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
FAISS.from_texts(chunks, embeddings).save_local("faiss_index")
# Streamlit UI
def main():
st.title("Fundrev Financial Analyzer")
# Initialize system
vector_store, conn = init_system()
query = st.text_input("Ask financial question:")
if query:
# Hybrid query logic
if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]):
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
st.table(cursor.fetchall())
else:
docs = vector_store.similarity_search(query)
st.write(docs[0].page_content)
if __name__ == "__main__":
main()