Spaces:
Build error
Build error
import os | |
import streamlit as st | |
import fitz | |
import sqlite3 | |
import pdfplumber | |
from langchain_community.vectorstores import FAISS | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter # Now works | |
def init_system(): | |
process_pdf("Q1FY24.pdf") | |
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) | |
vector_store = FAISS.load_local("faiss_index", embeddings) | |
conn = sqlite3.connect('metric_table.db') | |
return vector_store, conn | |
def process_pdf(pdf_path): | |
# Structured Data Extraction | |
conn = sqlite3.connect('metric_table.db') | |
cursor = conn.cursor() | |
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table | |
(metric TEXT, quarter TEXT, value REAL)''') | |
# Example metric insertion (add full extraction logic) | |
cursor.execute("INSERT INTO metric_table VALUES ('Revenue', 'Q1 FY24', 19.8)") | |
conn.commit() | |
# Unstructured Data Processing | |
full_text = "" | |
with fitz.open(pdf_path) as doc: | |
for page in doc: | |
full_text += page.get_text() | |
# Text Chunking & Embedding | |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000) | |
chunks = splitter.split_text(full_text) | |
embeddings = OpenAIEmbeddings( | |
openai_api_key=os.getenv("OPENAI_API_KEY"), | |
model="text-embedding-ada-002" | |
) | |
FAISS.from_texts(chunks, embeddings).save_local("faiss_index") | |
# Streamlit UI | |
def main(): | |
st.title("Fundrev Financial Analyzer") | |
# Initialize system | |
vector_store, conn = init_system() | |
query = st.text_input("Ask financial question:") | |
if query: | |
# Structured data queries | |
if any(kw in query.lower() for kw in ["trend", "margin", "revenue"]): | |
cursor = conn.cursor() | |
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'") | |
results = cursor.fetchall() | |
st.table(results if results else "No matching metrics found") | |
# Unstructured data queries | |
else: | |
docs = vector_store.similarity_search(query, k=1) | |
st.write(docs[0].page_content if docs else "No relevant information found") | |
if __name__ == "__main__": | |
main() |