Spaces:
Build error
Build error
| import os | |
| import streamlit as st | |
| import fitz | |
| import sqlite3 | |
| import pdfplumber | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter # Now works | |
| def init_system(): | |
| process_pdf("Q1FY24.pdf") | |
| embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) | |
| vector_store = FAISS.load_local("faiss_index", embeddings) | |
| conn = sqlite3.connect('metric_table.db') | |
| return vector_store, conn | |
| def process_pdf(pdf_path): | |
| # Structured Data Extraction | |
| conn = sqlite3.connect('metric_table.db') | |
| cursor = conn.cursor() | |
| cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table | |
| (metric TEXT, quarter TEXT, value REAL)''') | |
| # Example metric insertion (add full extraction logic) | |
| cursor.execute("INSERT INTO metric_table VALUES ('Revenue', 'Q1 FY24', 19.8)") | |
| conn.commit() | |
| # Unstructured Data Processing | |
| full_text = "" | |
| with fitz.open(pdf_path) as doc: | |
| for page in doc: | |
| full_text += page.get_text() | |
| # Text Chunking & Embedding | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000) | |
| chunks = splitter.split_text(full_text) | |
| embeddings = OpenAIEmbeddings( | |
| openai_api_key=os.getenv("OPENAI_API_KEY"), | |
| model="text-embedding-ada-002" | |
| ) | |
| FAISS.from_texts(chunks, embeddings).save_local("faiss_index") | |
| # Streamlit UI | |
| def main(): | |
| st.title("Fundrev Financial Analyzer") | |
| # Initialize system | |
| vector_store, conn = init_system() | |
| query = st.text_input("Ask financial question:") | |
| if query: | |
| # Structured data queries | |
| if any(kw in query.lower() for kw in ["trend", "margin", "revenue"]): | |
| cursor = conn.cursor() | |
| cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'") | |
| results = cursor.fetchall() | |
| st.table(results if results else "No matching metrics found") | |
| # Unstructured data queries | |
| else: | |
| docs = vector_store.similarity_search(query, k=1) | |
| st.write(docs[0].page_content if docs else "No relevant information found") | |
| if __name__ == "__main__": | |
| main() |