PDFExtractor / app.py
Vela
enhanced graph
75115cd
import streamlit as st
import os
from application.services import gemini_api_service, streamlit_function
from google.genai.errors import ClientError
from application.utils import logger
from application.schemas.response_schema import (
GEMINI_GHG_PARAMETERS, GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD,
GEMINI_ENVIRONMENT_PARAMETERS, GEMINI_SOCIAL_PARAMETERS,
GEMINI_GOVERNANCE_PARAMETERS, GEMINI_MATERIALITY_PARAMETERS,
GEMINI_NET_ZERO_INTERVENTION_PARAMETERS, FULL_RESPONSE_SCHEMA
)
import test
logger = logger.get_logger()
streamlit_function.config_homepage()
st.title("Sustainability Report Analyzer")
st.write("Upload your sustainability report PDF and generate insights using different models.")
MODEL = ["gemini-1.5-pro-latest", "gemini-2.0-flash", "gemini-1.5-flash", "gemini-2.5-exp"]
MODEL_1 = "gemini-1.5-pro-latest"
MODEL_2 = "gemini-2.0-flash"
MODEL_3 = "gemini-1.5-flash"
API_1 = "gemini"
API_2 = "gemini"
API_3 = "gemini"
pdf_file = streamlit_function.upload_file("pdf", label="Upload Sustainability Report PDF")
for key in [f"{MODEL_1}_result", f"{MODEL_2}_result", f"{MODEL_3}_result", "pdf_file"]:
if key not in st.session_state:
st.session_state[key] = None
if "excel_file" not in st.session_state:
st.session_state["excel_file"] = None
if st.session_state.pdf_file:
with st.container():
col1, col2, col3 = st.columns([5, 5, 5], gap="small")
file_name = st.session_state.pdf_file[0].name.removesuffix(".pdf")
excel_file=None
with col1:
if st.button(f"Generate {MODEL_1} Response"):
with st.spinner(f"Calling {MODEL_1}..."):
result = gemini_api_service.extract_emissions_data_as_json(API_1 , MODEL_1, st.session_state.pdf_file[0],FULL_RESPONSE_SCHEMA)
excel_file = streamlit_function.export_results_to_excel(result, MODEL_1, file_name)
st.session_state[f"{MODEL_1}_result"] = result
if st.session_state[f"{MODEL_1}_result"]:
st.write(f"Extracted Metrics by {MODEL_1}_result")
st.json(st.session_state[f"{MODEL_1}_result"])
with col2:
if st.button(f"Generate {MODEL_2} Response"):
with st.spinner(f"Calling {MODEL_2}..."):
result = gemini_api_service.extract_emissions_data_as_json(API_2, MODEL_2, st.session_state.pdf_file[0],FULL_RESPONSE_SCHEMA)
excel_file = streamlit_function.export_results_to_excel(result, MODEL_2, file_name)
st.session_state[f"{MODEL_2}_result"] = result
if st.session_state[f"{MODEL_2}_result"]:
st.write(f"Extracted Metrics by {MODEL_2}_result")
st.json(st.session_state[f"{MODEL_2}_result"])
with col3:
try:
if st.button(f"Generate {MODEL_3} Response"):
with st.spinner(f"Calling {MODEL_3}..."):
result = gemini_api_service.extract_emissions_data_as_json(API_3, MODEL_3, st.session_state.pdf_file[0], FULL_RESPONSE_SCHEMA)
excel_file = streamlit_function.export_results_to_excel(result, MODEL_3, file_name)
st.session_state[f"{MODEL_3}_result"] = result
except ClientError as e:
st.error(f"Gemini API Error: {e}")
logger.error("Error Details:", e.message, e.response)
if st.session_state[f"{MODEL_3}_result"]:
st.write(f"Extracted Metrics by {MODEL_3}_result")
st.json(st.session_state[f"{MODEL_3}_result"])
file_path = f"data/{file_name}.xlsx"
if os.path.exists(file_path):
with open(file_path, "rb") as file:
st.download_button(
label="Download Excel File",
data=file,
file_name=f"{file_name}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)