Spaces:
Build error
Build error
File size: 2,238 Bytes
7155a9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import streamlit as st
import fitz
import openai
import sqlite3
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber
# Initialize once
@st.cache_resource
def init_system():
# 1. Process PDF
process_pdf("Q1FY24.pdf")
# 2. Load pre-processed data
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
vector_store = FAISS.load_local("faiss_index", embeddings)
# 3. Connect SQL
conn = sqlite3.connect('metric_table.db')
return vector_store, conn
def process_pdf(pdf_path):
# Structured Data
conn = sqlite3.connect('metric_table.db')
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
(metric TEXT, quarter TEXT, value REAL)''')
# Unstructured Data
full_text = ""
doc = fitz.open(pdf_path)
with pdfplumber.open(pdf_path) as pdf:
for page_num, page in enumerate(pdf.pages):
# Structured extraction
if "Financial Performance Summary" in page.extract_text():
tables = page.extract_tables()
# Add to SQL (example)
# ... (Add full processing logic from previous code)
# Save vector store
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
chunks = splitter.split_text(full_text)
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
FAISS.from_texts(chunks, embeddings).save_local("faiss_index")
# Streamlit UI
def main():
st.title("Fundrev Financial Analyzer")
# Initialize system
vector_store, conn = init_system()
query = st.text_input("Ask financial question:")
if query:
# Hybrid query logic
if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]):
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
st.table(cursor.fetchall())
else:
docs = vector_store.similarity_search(query)
st.write(docs[0].page_content)
if __name__ == "__main__":
main() |