Spaces:
Build error
Build error
File size: 2,268 Bytes
7155a9f c45a056 da39e0a 0a07fd2 7155a9f da39e0a 7155a9f da39e0a 7155a9f c45a056 7155a9f c45a056 7155a9f c45a056 7155a9f c45a056 7155a9f 0a07fd2 7155a9f c45a056 7155a9f c45a056 7155a9f c45a056 7155a9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import os
import streamlit as st
import fitz
import sqlite3
import pdfplumber
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter # Now works
@st.cache_resource
def init_system():
process_pdf("Q1FY24.pdf")
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
vector_store = FAISS.load_local("faiss_index", embeddings)
conn = sqlite3.connect('metric_table.db')
return vector_store, conn
def process_pdf(pdf_path):
# Structured Data Extraction
conn = sqlite3.connect('metric_table.db')
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
(metric TEXT, quarter TEXT, value REAL)''')
# Example metric insertion (add full extraction logic)
cursor.execute("INSERT INTO metric_table VALUES ('Revenue', 'Q1 FY24', 19.8)")
conn.commit()
# Unstructured Data Processing
full_text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
full_text += page.get_text()
# Text Chunking & Embedding
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
chunks = splitter.split_text(full_text)
embeddings = OpenAIEmbeddings(
openai_api_key=os.getenv("OPENAI_API_KEY"),
model="text-embedding-ada-002"
)
FAISS.from_texts(chunks, embeddings).save_local("faiss_index")
# Streamlit UI
def main():
st.title("Fundrev Financial Analyzer")
# Initialize system
vector_store, conn = init_system()
query = st.text_input("Ask financial question:")
if query:
# Structured data queries
if any(kw in query.lower() for kw in ["trend", "margin", "revenue"]):
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
results = cursor.fetchall()
st.table(results if results else "No matching metrics found")
# Unstructured data queries
else:
docs = vector_store.similarity_search(query, k=1)
st.write(docs[0].page_content if docs else "No relevant information found")
if __name__ == "__main__":
main() |