dwb2023 commited on
Commit
6cf4016
·
1 Parent(s): 4b38188

update 2025

Browse files
pages/4_🗺️_Feb_2025_Navigator.py CHANGED
@@ -2,6 +2,27 @@ import streamlit as st
2
  import duckdb
3
  import pandas as pd
4
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Constants for raw data categories
7
  GDELT_CATEGORIES = {
@@ -38,7 +59,7 @@ def initialize_db():
38
  return con
39
 
40
  def fetch_data(con, source_filter=None,
41
- start_date=None, end_date=None, limit=10, include_all_columns=False):
42
  """Fetch filtered data from the database"""
43
  if include_all_columns:
44
  columns = "*"
@@ -107,6 +128,24 @@ def render_data_grid(df):
107
  return selected[0]
108
  return None
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def render_raw_data(record):
111
  """Render raw GDELT data in expandable sections."""
112
  st.header("Full Record Details")
@@ -138,7 +177,7 @@ def main():
138
  source = st.text_input("Filter by source name")
139
  start_date = st.text_input("Start date (YYYYMMDD)", "20250210")
140
  end_date = st.text_input("End date (YYYYMMDD)", "20250211")
141
- limit = st.slider("Number of results to display", 10, 500, 10)
142
 
143
  # Fetch initial data view
144
  df_initial = fetch_data(
@@ -171,8 +210,9 @@ def main():
171
  # Find the full record in the original DataFrame using the selected ID
172
  selected_id = selected_row['ID']
173
  full_record = df_full[df_full['GKGRECORDID'] == selected_id].iloc[0]
174
-
175
- # Display the raw data below the grid
 
176
  render_raw_data(full_record)
177
  else:
178
  st.info("Select a record above to view its complete details.")
@@ -182,4 +222,4 @@ def main():
182
  # Close database connection
183
  con.close()
184
 
185
- main()
 
2
  import duckdb
3
  import pandas as pd
4
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
5
+ from st_link_analysis import st_link_analysis, NodeStyle, EdgeStyle
6
+ from graph_builder import StLinkBuilder
7
+
8
+ # Node styles configuration
9
+ NODE_STYLES = [
10
+ NodeStyle("EVENT", "#FF7F3E", "name", "description"),
11
+ NodeStyle("PERSON", "#4CAF50", "name", "person"),
12
+ NodeStyle("NAME", "#2A629A", "created_at", "badge"),
13
+ NodeStyle("ORGANIZATION", "#9C27B0", "name", "business"),
14
+ NodeStyle("LOCATION", "#2196F3", "name", "place"),
15
+ NodeStyle("THEME", "#FFC107", "name", "sell"),
16
+ NodeStyle("COUNT", "#795548", "name", "inventory"),
17
+ NodeStyle("AMOUNT", "#607D8B", "name", "wallet"),
18
+ ]
19
+
20
+ # Edge styles configuration
21
+ EDGE_STYLES = [
22
+ EdgeStyle("MENTIONED_IN", caption="label", directed=True),
23
+ EdgeStyle("LOCATED_IN", caption="label", directed=True),
24
+ EdgeStyle("CATEGORIZED_AS", caption="label", directed=True)
25
+ ]
26
 
27
  # Constants for raw data categories
28
  GDELT_CATEGORIES = {
 
59
  return con
60
 
61
  def fetch_data(con, source_filter=None,
62
+ start_date=None, end_date=None, limit=100, include_all_columns=False):
63
  """Fetch filtered data from the database"""
64
  if include_all_columns:
65
  columns = "*"
 
128
  return selected[0]
129
  return None
130
 
131
+ def render_graph(record):
132
+ """
133
+ Render a graph visualization for the selected record.
134
+ Uses StLinkBuilder to convert the record into graph format and then
135
+ displays the graph using st_link_analysis.
136
+ """
137
+ st.subheader(f"Event Graph: {record.get('GKGRECORDID', 'Unknown')}")
138
+ stlink_builder = StLinkBuilder()
139
+ # Convert the record (a Series) into a DataFrame with one row
140
+ record_df = pd.DataFrame([record])
141
+ graph_data = stlink_builder.build_graph(record_df)
142
+ return st_link_analysis(
143
+ elements=graph_data,
144
+ layout="fcose", # Column configuration for data grid - cose, fcose, breadthfirst, cola
145
+ node_styles=NODE_STYLES,
146
+ edge_styles=EDGE_STYLES
147
+ )
148
+
149
  def render_raw_data(record):
150
  """Render raw GDELT data in expandable sections."""
151
  st.header("Full Record Details")
 
177
  source = st.text_input("Filter by source name")
178
  start_date = st.text_input("Start date (YYYYMMDD)", "20250210")
179
  end_date = st.text_input("End date (YYYYMMDD)", "20250211")
180
+ limit = st.slider("Number of results to display", 10, 500, 100)
181
 
182
  # Fetch initial data view
183
  df_initial = fetch_data(
 
210
  # Find the full record in the original DataFrame using the selected ID
211
  selected_id = selected_row['ID']
212
  full_record = df_full[df_full['GKGRECORDID'] == selected_id].iloc[0]
213
+
214
+ # Display the graph and raw data below the grid
215
+ render_graph(full_record)
216
  render_raw_data(full_record)
217
  else:
218
  st.info("Select a record above to view its complete details.")
 
222
  # Close database connection
223
  con.close()
224
 
225
+ main()
pages/5_🔍_Feb_2025_Event_Graph.py CHANGED
@@ -50,7 +50,8 @@ def fetch_data(con, source_filter=None,
50
  if include_all_columns:
51
  columns = "*"
52
  else:
53
- columns = "GKGRECORDID, DATE, SourceCommonName, tone, DocumentIdentifier, 'V2.1Quotations', SourceCollectionIdentifier"
 
54
 
55
  query = f"""
56
  SELECT {columns}
@@ -139,6 +140,7 @@ def main():
139
  Filter and select individual event records to display their detailed graph representations. Analyze relationships between events and associated entities using the interactive graph below.
140
  """)
141
 
 
142
  # Initialize database connection using context manager
143
  with initialize_db() as con:
144
  if con is not None:
@@ -151,7 +153,7 @@ def main():
151
  start_date = st.text_input("Start date (YYYYMMDD)", "20250210")
152
  end_date = st.text_input("End date (YYYYMMDD)", "20250211")
153
  limit = st.slider("Number of results to display", 10, 500, 100)
154
-
155
  # Fetch initial data view
156
  df_initial = fetch_data(
157
  con=con,
@@ -171,20 +173,20 @@ def main():
171
  limit=limit,
172
  include_all_columns=True
173
  )
174
-
175
  # Create a DataFrame for the grid with only the key columns
176
- grid_df = df_initial[['GKGRECORDID', 'DATE', 'SourceCommonName', 'tone', 'DocumentIdentifier', 'SourceCollectionIdentifier']].copy()
177
- grid_df.columns = ['ID', 'Date', 'Source', 'Tone', 'Doc ID', 'Source Collection ID']
178
 
179
  # Render the interactive data grid at the top
180
  selected_row = render_data_grid(grid_df)
181
-
182
  if selected_row:
183
  # Find the full record in the original DataFrame using the selected ID
184
  selected_id = selected_row['ID']
185
  full_record = df_full[df_full['GKGRECORDID'] == selected_id].iloc[0]
186
-
187
- # Display the graph and raw data below the grid
188
  render_graph(full_record)
189
  else:
190
  st.info("Use the grid filters above to search and select a record.")
 
50
  if include_all_columns:
51
  columns = "*"
52
  else:
53
+ # Changed column specification: use double quotes for column names with periods.
54
+ columns = 'GKGRECORDID, DATE, SourceCommonName, tone, DocumentIdentifier, "V2.1SharingImage", "V2.1Quotations", SourceCollectionIdentifier'
55
 
56
  query = f"""
57
  SELECT {columns}
 
140
  Filter and select individual event records to display their detailed graph representations. Analyze relationships between events and associated entities using the interactive graph below.
141
  """)
142
 
143
+
144
  # Initialize database connection using context manager
145
  with initialize_db() as con:
146
  if con is not None:
 
153
  start_date = st.text_input("Start date (YYYYMMDD)", "20250210")
154
  end_date = st.text_input("End date (YYYYMMDD)", "20250211")
155
  limit = st.slider("Number of results to display", 10, 500, 100)
156
+
157
  # Fetch initial data view
158
  df_initial = fetch_data(
159
  con=con,
 
173
  limit=limit,
174
  include_all_columns=True
175
  )
176
+
177
  # Create a DataFrame for the grid with only the key columns
178
+ grid_df = df_initial[['GKGRECORDID', 'DATE', 'SourceCommonName', 'tone', 'DocumentIdentifier', "V2.1SharingImage", 'SourceCollectionIdentifier']].copy()
179
+ grid_df.columns = ['ID', 'Date', 'Source', 'Tone', 'Doc ID', 'Image', 'Source Collection ID']
180
 
181
  # Render the interactive data grid at the top
182
  selected_row = render_data_grid(grid_df)
183
+
184
  if selected_row:
185
  # Find the full record in the original DataFrame using the selected ID
186
  selected_id = selected_row['ID']
187
  full_record = df_full[df_full['GKGRECORDID'] == selected_id].iloc[0]
188
+
189
+ # Display the graph and raw data below the grid
190
  render_graph(full_record)
191
  else:
192
  st.info("Use the grid filters above to search and select a record.")