Manishkumaryadav commited on
Commit
7155a9f
·
verified ·
1 Parent(s): 7fc6368

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import fitz
4
+ import openai
5
+ import sqlite3
6
+ from langchain.embeddings import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ import pdfplumber
10
+
11
+ # Initialize once
12
+ @st.cache_resource
13
+ def init_system():
14
+ # 1. Process PDF
15
+ process_pdf("Q1FY24.pdf")
16
+
17
+ # 2. Load pre-processed data
18
+ embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
19
+ vector_store = FAISS.load_local("faiss_index", embeddings)
20
+
21
+ # 3. Connect SQL
22
+ conn = sqlite3.connect('metric_table.db')
23
+ return vector_store, conn
24
+
25
+ def process_pdf(pdf_path):
26
+ # Structured Data
27
+ conn = sqlite3.connect('metric_table.db')
28
+ cursor = conn.cursor()
29
+ cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
30
+ (metric TEXT, quarter TEXT, value REAL)''')
31
+
32
+ # Unstructured Data
33
+ full_text = ""
34
+ doc = fitz.open(pdf_path)
35
+
36
+ with pdfplumber.open(pdf_path) as pdf:
37
+ for page_num, page in enumerate(pdf.pages):
38
+ # Structured extraction
39
+ if "Financial Performance Summary" in page.extract_text():
40
+ tables = page.extract_tables()
41
+ # Add to SQL (example)
42
+
43
+ # ... (Add full processing logic from previous code)
44
+
45
+ # Save vector store
46
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
47
+ chunks = splitter.split_text(full_text)
48
+ embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
49
+ FAISS.from_texts(chunks, embeddings).save_local("faiss_index")
50
+
51
+ # Streamlit UI
52
+ def main():
53
+ st.title("Fundrev Financial Analyzer")
54
+
55
+ # Initialize system
56
+ vector_store, conn = init_system()
57
+
58
+ query = st.text_input("Ask financial question:")
59
+
60
+ if query:
61
+ # Hybrid query logic
62
+ if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]):
63
+ cursor = conn.cursor()
64
+ cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
65
+ st.table(cursor.fetchall())
66
+ else:
67
+ docs = vector_store.similarity_search(query)
68
+ st.write(docs[0].page_content)
69
+
70
+ if __name__ == "__main__":
71
+ main()