Vela
commited on
Commit
·
2692728
1
Parent(s):
5d4ad83
removed extraction tool
Browse files- .gitignore +2 -1
- app.py +86 -119
- pages/database.py +0 -92
- src/utils/__pycache__/common_functions.cpython-313.pyc +0 -0
- src/utils/__pycache__/streamlit_function.cpython-313.pyc +0 -0
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
.venv
|
2 |
logs
|
3 |
-
.env
|
|
|
|
1 |
.venv
|
2 |
logs
|
3 |
+
.env
|
4 |
+
src/utils/__pycache__/
|
app.py
CHANGED
@@ -1,125 +1,92 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
|
4 |
from src.utils import streamlit_function
|
5 |
-
from src.utils import
|
|
|
|
|
6 |
|
7 |
-
logger =
|
8 |
streamlit_function.config_homepage()
|
9 |
|
10 |
-
st.title("
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
# # for i, col in enumerate(columns):
|
92 |
-
# # if i < len(st.session_state.uploaded_files):
|
93 |
-
# # pdf_file = st.session_state.uploaded_files[i]
|
94 |
-
# # file_name = pdf_file.name.removesuffix(".pdf")
|
95 |
-
# # result_key = f"{MODEL}_result_file_{i+1}"
|
96 |
-
|
97 |
-
# # with col:
|
98 |
-
# # st.write(f"**File {i+1}:** `{pdf_file.name}`")
|
99 |
-
# # if st.button(f"Extract Data from File {i+1}", key=f"extract_btn_{i}"):
|
100 |
-
# # with st.spinner(f"Extracting data from File {i+1} using {MODEL}..."):
|
101 |
-
# # for schema in response_schema:
|
102 |
-
# # result = gemini_model.extract_emissions_data_as_json(API_1, MODEL, pdf_file, schema)
|
103 |
-
# # if schema == GEMINI_GHG_PARAMETERS:
|
104 |
-
# # column = "Greenhouse Gas (GHG) Protocol Parameters"
|
105 |
-
# # elif schema == GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD:
|
106 |
-
# # column = "Environmental Parameters (CSRD)"
|
107 |
-
# # elif schema == GEMINI_ENVIRONMENT_PARAMETERS:
|
108 |
-
# # column = "Environmental Parameters"
|
109 |
-
# # elif schema == GEMINI_SOCIAL_PARAMETERS:
|
110 |
-
# # column = "Social Parameters"
|
111 |
-
# # elif schema == GEMINI_GOVERNANCE_PARAMETERS:
|
112 |
-
# # column = "Governance Parameters"
|
113 |
-
# # elif schema == GEMINI_MATERIALITY_PARAMETERS:
|
114 |
-
# # column = "Materiality Parameters"
|
115 |
-
# # elif schema == GEMINI_NET_ZERO_INTERVENTION_PARAMETERS:
|
116 |
-
# # column = "Net Zero Intervention Parameters"
|
117 |
-
# # else:
|
118 |
-
# # column = None
|
119 |
-
|
120 |
-
# # test.export_results_to_excel(result, sheet_name=MODEL, filename=file_name, column=column )
|
121 |
-
# # st.session_state[result_key] = result
|
122 |
-
|
123 |
-
# # if st.session_state.get(result_key):
|
124 |
-
# # st.write(f"**Extracted Metrics for File {i+1}:**")
|
125 |
-
# # st.json(st.session_state[result_key])
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
|
4 |
from src.utils import streamlit_function
|
5 |
+
from src.utils.logger import get_logger
|
6 |
+
from src.services.mongo_db_service import retrieve_documents
|
7 |
+
from src.utils.common_functions import prepare_comparison_df
|
8 |
|
9 |
+
logger = get_logger()
|
10 |
streamlit_function.config_homepage()
|
11 |
|
12 |
+
st.title("📊 ESG Report Comparison Dashboard")
|
13 |
+
|
14 |
+
METRIC_OPTIONS = {
|
15 |
+
"Report Metadata": ["report_metadata"],
|
16 |
+
"Environmental Parameters": [
|
17 |
+
"Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
|
18 |
+
"Waste Generation", "Waste Disposal", "Waste Recovery"
|
19 |
+
],
|
20 |
+
"Social Parameters": [
|
21 |
+
"Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
|
22 |
+
"Health & Safety Training Coverage", "Grievances Reported",
|
23 |
+
"Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
|
24 |
+
"Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
|
25 |
+
"Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
|
26 |
+
],
|
27 |
+
"Governance Parameters": [
|
28 |
+
"Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
|
29 |
+
"Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
|
30 |
+
"Sales Concentration", "Related Party Transactions"
|
31 |
+
],
|
32 |
+
"Materiality": ["material_topics"]
|
33 |
+
}
|
34 |
+
|
35 |
+
ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
|
36 |
+
|
37 |
+
company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
|
38 |
+
available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
|
39 |
+
|
40 |
+
selected_companies = st.multiselect(
|
41 |
+
"Select up to 3 companies",
|
42 |
+
options=available_company_data,
|
43 |
+
max_selections=3
|
44 |
+
)
|
45 |
+
|
46 |
+
def get_all_years(docs) -> list:
|
47 |
+
years = set()
|
48 |
+
for doc in docs:
|
49 |
+
if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
|
50 |
+
years.update(doc["esg_reports"].keys())
|
51 |
+
return sorted(years, reverse=True)
|
52 |
+
|
53 |
+
def highlight_missing_values(df):
|
54 |
+
return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
|
55 |
+
|
56 |
+
def extract_company_name_from_doc(doc, default_name):
|
57 |
+
return doc.get("report_metadata", {}).get("company_legal_name", default_name)
|
58 |
+
|
59 |
+
if selected_companies:
|
60 |
+
all_years = get_all_years(company_docs)
|
61 |
+
|
62 |
+
selected_year = st.selectbox(
|
63 |
+
"Select a report year (applies to all selected companies)",
|
64 |
+
options=["-- Select Year --"] + all_years,
|
65 |
+
key="common_year"
|
66 |
+
)
|
67 |
+
|
68 |
+
if selected_year != "-- Select Year --":
|
69 |
+
tabs = st.tabs(list(METRIC_OPTIONS.keys()))
|
70 |
+
metric_categories = list(METRIC_OPTIONS.keys())
|
71 |
+
for i, tab in enumerate(tabs):
|
72 |
+
with tab:
|
73 |
+
st.subheader(metric_categories[i])
|
74 |
+
metric_keys = METRIC_OPTIONS[metric_categories[i]]
|
75 |
+
for metric in metric_keys:
|
76 |
+
st.markdown(f"### {metric}")
|
77 |
+
|
78 |
+
comparison_df = prepare_comparison_df(
|
79 |
+
selected_companies,
|
80 |
+
selected_year,
|
81 |
+
metric,
|
82 |
+
company_docs
|
83 |
+
)
|
84 |
+
|
85 |
+
if comparison_df is not None:
|
86 |
+
st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
|
87 |
+
else:
|
88 |
+
st.warning(f"No data found for **{metric}** in {selected_year}")
|
89 |
+
else:
|
90 |
+
st.info("Please select a year to view report comparisons.")
|
91 |
+
else:
|
92 |
+
st.info("Please select at least one company to continue.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/database.py
DELETED
@@ -1,92 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
|
4 |
-
from src.utils import streamlit_function
|
5 |
-
from src.utils.logger import get_logger
|
6 |
-
from src.services.mongo_db_service import retrieve_documents
|
7 |
-
from src.utils.common_functions import prepare_comparison_df
|
8 |
-
|
9 |
-
logger = get_logger()
|
10 |
-
streamlit_function.config_homepage()
|
11 |
-
|
12 |
-
st.title("📊 ESG Report Comparison Dashboard")
|
13 |
-
|
14 |
-
METRIC_OPTIONS = {
|
15 |
-
"Report Metadata": ["report_metadata"],
|
16 |
-
"Environmental Parameters": [
|
17 |
-
"Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
|
18 |
-
"Waste Generation", "Waste Disposal", "Waste Recovery"
|
19 |
-
],
|
20 |
-
"Social Parameters": [
|
21 |
-
"Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
|
22 |
-
"Health & Safety Training Coverage", "Grievances Reported",
|
23 |
-
"Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
|
24 |
-
"Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
|
25 |
-
"Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
|
26 |
-
],
|
27 |
-
"Governance Parameters": [
|
28 |
-
"Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
|
29 |
-
"Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
|
30 |
-
"Sales Concentration", "Related Party Transactions"
|
31 |
-
],
|
32 |
-
"Materiality": ["material_topics"]
|
33 |
-
}
|
34 |
-
|
35 |
-
ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
|
36 |
-
|
37 |
-
company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
|
38 |
-
available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
|
39 |
-
|
40 |
-
selected_companies = st.multiselect(
|
41 |
-
"Select up to 3 companies",
|
42 |
-
options=available_company_data,
|
43 |
-
max_selections=3
|
44 |
-
)
|
45 |
-
|
46 |
-
def get_all_years(docs) -> list:
|
47 |
-
years = set()
|
48 |
-
for doc in docs:
|
49 |
-
if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
|
50 |
-
years.update(doc["esg_reports"].keys())
|
51 |
-
return sorted(years, reverse=True)
|
52 |
-
|
53 |
-
def highlight_missing_values(df):
|
54 |
-
return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
|
55 |
-
|
56 |
-
def extract_company_name_from_doc(doc, default_name):
|
57 |
-
return doc.get("report_metadata", {}).get("company_legal_name", default_name)
|
58 |
-
|
59 |
-
if selected_companies:
|
60 |
-
all_years = get_all_years(company_docs)
|
61 |
-
|
62 |
-
selected_year = st.selectbox(
|
63 |
-
"Select a report year (applies to all selected companies)",
|
64 |
-
options=["-- Select Year --"] + all_years,
|
65 |
-
key="common_year"
|
66 |
-
)
|
67 |
-
|
68 |
-
if selected_year != "-- Select Year --":
|
69 |
-
tabs = st.tabs(list(METRIC_OPTIONS.keys()))
|
70 |
-
metric_categories = list(METRIC_OPTIONS.keys())
|
71 |
-
for i, tab in enumerate(tabs):
|
72 |
-
with tab:
|
73 |
-
st.subheader(metric_categories[i])
|
74 |
-
metric_keys = METRIC_OPTIONS[metric_categories[i]]
|
75 |
-
for metric in metric_keys:
|
76 |
-
st.markdown(f"### {metric}")
|
77 |
-
|
78 |
-
comparison_df = prepare_comparison_df(
|
79 |
-
selected_companies,
|
80 |
-
selected_year,
|
81 |
-
metric,
|
82 |
-
company_docs
|
83 |
-
)
|
84 |
-
|
85 |
-
if comparison_df is not None:
|
86 |
-
st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
|
87 |
-
else:
|
88 |
-
st.warning(f"No data found for **{metric}** in {selected_year}")
|
89 |
-
else:
|
90 |
-
st.info("Please select a year to view report comparisons.")
|
91 |
-
else:
|
92 |
-
st.info("Please select at least one company to continue.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/__pycache__/common_functions.cpython-313.pyc
CHANGED
Binary files a/src/utils/__pycache__/common_functions.cpython-313.pyc and b/src/utils/__pycache__/common_functions.cpython-313.pyc differ
|
|
src/utils/__pycache__/streamlit_function.cpython-313.pyc
CHANGED
Binary files a/src/utils/__pycache__/streamlit_function.cpython-313.pyc and b/src/utils/__pycache__/streamlit_function.cpython-313.pyc differ
|
|