Spaces:
Build error
Build error
import os | |
import streamlit as st | |
import fitz | |
import openai | |
import sqlite3 | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import pdfplumber | |
# Initialize once | |
def init_system(): | |
# 1. Process PDF | |
process_pdf("Q1FY24.pdf") | |
# 2. Load pre-processed data | |
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) | |
vector_store = FAISS.load_local("faiss_index", embeddings) | |
# 3. Connect SQL | |
conn = sqlite3.connect('metric_table.db') | |
return vector_store, conn | |
def process_pdf(pdf_path): | |
# Structured Data | |
conn = sqlite3.connect('metric_table.db') | |
cursor = conn.cursor() | |
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table | |
(metric TEXT, quarter TEXT, value REAL)''') | |
# Unstructured Data | |
full_text = "" | |
doc = fitz.open(pdf_path) | |
with pdfplumber.open(pdf_path) as pdf: | |
for page_num, page in enumerate(pdf.pages): | |
# Structured extraction | |
if "Financial Performance Summary" in page.extract_text(): | |
tables = page.extract_tables() | |
# Add to SQL (example) | |
# ... (Add full processing logic from previous code) | |
# Save vector store | |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000) | |
chunks = splitter.split_text(full_text) | |
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) | |
FAISS.from_texts(chunks, embeddings).save_local("faiss_index") | |
# Streamlit UI | |
def main(): | |
st.title("Fundrev Financial Analyzer") | |
# Initialize system | |
vector_store, conn = init_system() | |
query = st.text_input("Ask financial question:") | |
if query: | |
# Hybrid query logic | |
if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]): | |
cursor = conn.cursor() | |
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'") | |
st.table(cursor.fetchall()) | |
else: | |
docs = vector_store.similarity_search(query) | |
st.write(docs[0].page_content) | |
if __name__ == "__main__": | |
main() |