File size: 2,238 Bytes
7155a9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import streamlit as st
import fitz
import openai
import sqlite3
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pdfplumber

# Initialize once
@st.cache_resource
def init_system():
    # 1. Process PDF
    process_pdf("Q1FY24.pdf")
    
    # 2. Load pre-processed data
    embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
    vector_store = FAISS.load_local("faiss_index", embeddings)
    
    # 3. Connect SQL
    conn = sqlite3.connect('metric_table.db')
    return vector_store, conn

def process_pdf(pdf_path):
    # Structured Data
    conn = sqlite3.connect('metric_table.db')
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table 
                    (metric TEXT, quarter TEXT, value REAL)''')

    # Unstructured Data
    full_text = ""
    doc = fitz.open(pdf_path)
    
    with pdfplumber.open(pdf_path) as pdf:
        for page_num, page in enumerate(pdf.pages):
            # Structured extraction
            if "Financial Performance Summary" in page.extract_text():
                tables = page.extract_tables()
                # Add to SQL (example)

    # ... (Add full processing logic from previous code)

    # Save vector store
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
    chunks = splitter.split_text(full_text)
    embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
    FAISS.from_texts(chunks, embeddings).save_local("faiss_index")

# Streamlit UI
def main():
    st.title("Fundrev Financial Analyzer")
    
    # Initialize system
    vector_store, conn = init_system()
    
    query = st.text_input("Ask financial question:")
    
    if query:
        # Hybrid query logic
        if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]):
            cursor = conn.cursor()
            cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
            st.table(cursor.fetchall())
        else:
            docs = vector_store.similarity_search(query)
            st.write(docs[0].page_content)

if __name__ == "__main__":
    main()