alemem64 commited on
Commit
49dd9ca
ยท
1 Parent(s): 72535ab

20250312 add app

Browse files
Files changed (2) hide show
  1. app.py +982 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,982 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+ import colorsys
5
+ import folium
6
+ import gradio as gr
7
+ from datetime import datetime
8
+ import os
9
+ from functools import lru_cache
10
+ import geopandas as gpd
11
+ from shapely.geometry import Point
12
+ from folium import plugins
13
+ import zipfile
14
+ import tempfile
15
+ import shutil
16
+
17
+ SEED = 42
18
+
19
+ # Initialize global variables
20
+ df = None
21
+ cluster_df = None
22
+ regions_gdf = None
23
+
24
+ # Add global variable for shapefile path
25
+ current_shp_path = 'data/gadm41_KOR_shp/gadm41_KOR_3.shp'
26
+
27
+ def process_upload(file_obj):
28
+ """Process uploaded CSV file"""
29
+ global df # ์ „์—ญ ๋ณ€์ˆ˜์ž„์„ ๋ช…์‹œ
30
+ if file_obj is None:
31
+ return "No file uploaded.", None
32
+
33
+ try:
34
+ file_path = file_obj.name
35
+ file_name = os.path.basename(file_path)
36
+ _, ext = os.path.splitext(file_path)
37
+ if ext.lower() != '.csv':
38
+ return "Please upload a CSV file.", None
39
+
40
+ # Try different encodings
41
+ for encoding in ['utf-8', 'cp949', 'euc-kr']:
42
+ try:
43
+ temp_df = pd.read_csv(file_path, engine='python', encoding=encoding)
44
+ # Remove rows where 'name' is null
45
+ original_len = len(temp_df)
46
+ temp_df = temp_df.dropna(subset=['name'])
47
+ rows_dropped = original_len - len(temp_df)
48
+
49
+ # Update the global df
50
+ df = temp_df # ์ „์—ญ ๋ณ€์ˆ˜ ์—…๋ฐ์ดํŠธ
51
+
52
+ return f"File uploaded and processed successfully. {len(df)} records loaded with {encoding} encoding. {rows_dropped} rows with null names were removed.", file_name
53
+ except UnicodeDecodeError:
54
+ continue
55
+ except Exception as e:
56
+ return f"Error processing file with {encoding} encoding: {str(e)}", None
57
+
58
+ return "Could not process the file with any of the supported encodings.", None
59
+ except Exception as e:
60
+ return f"Error processing upload: {str(e)}", None
61
+
62
+ def process_cluster_upload(file_obj):
63
+ """Process uploaded cluster CSV file"""
64
+ global cluster_df # ์ „์—ญ ๋ณ€์ˆ˜์ž„์„ ๋ช…์‹œ
65
+ if file_obj is None:
66
+ return "No cluster file uploaded.", None
67
+
68
+ try:
69
+ file_path = file_obj.name
70
+ file_name = os.path.basename(file_path)
71
+ _, ext = os.path.splitext(file_path)
72
+ if ext.lower() != '.csv':
73
+ return "Please upload a CSV file.", None
74
+
75
+ # Try different encodings
76
+ for encoding in ['utf-8', 'cp949', 'euc-kr']:
77
+ try:
78
+ temp_df = pd.read_csv(file_path, engine='python', encoding=encoding)
79
+
80
+ # Update the global cluster_df
81
+ cluster_df = temp_df # ์ „์—ญ ๋ณ€์ˆ˜ ์—…๋ฐ์ดํŠธ
82
+
83
+ return f"Cluster file uploaded and processed successfully. {len(cluster_df)} records loaded with {encoding} encoding.", file_name
84
+ except UnicodeDecodeError:
85
+ continue
86
+ except Exception as e:
87
+ return f"Error processing cluster file with {encoding} encoding: {str(e)}", None
88
+
89
+ return "Could not process the cluster file with any of the supported encodings.", None
90
+ except Exception as e:
91
+ return f"Error processing cluster upload: {str(e)}", None
92
+
93
+ def process_shp_upload(file_obj):
94
+ """Process uploaded shapefile ZIP"""
95
+ global regions_gdf, current_shp_path
96
+ if file_obj is None:
97
+ return "No file uploaded.", None
98
+
99
+ try:
100
+ file_path = file_obj.name
101
+ file_name = os.path.basename(file_path)
102
+ _, ext = os.path.splitext(file_path)
103
+ if ext.lower() != '.zip':
104
+ return "Please upload a ZIP file containing shapefile components.", None
105
+
106
+ # Create a temporary directory to extract files
107
+ with tempfile.TemporaryDirectory() as temp_dir:
108
+ # Extract ZIP contents
109
+ with zipfile.ZipFile(file_path, 'r') as zip_ref:
110
+ zip_ref.extractall(temp_dir)
111
+
112
+ # Find .shp file in the extracted contents, excluding __MACOSX directory
113
+ shp_files = []
114
+ for root, _, files in os.walk(temp_dir):
115
+ # Skip __MACOSX directory
116
+ if '__MACOSX' in root:
117
+ continue
118
+ for file in files:
119
+ if file.endswith('.shp'):
120
+ shp_files.append(os.path.join(root, file))
121
+
122
+ if not shp_files:
123
+ return "No .shp file found in the ZIP archive.", None
124
+
125
+ # Use the first .shp file found
126
+ shp_path = shp_files[0]
127
+
128
+ try:
129
+ # Read the shapefile
130
+ regions_gdf = gpd.read_file(shp_path).to_crs("EPSG:4326")
131
+
132
+ # Create a permanent directory for the shapefiles if it doesn't exist
133
+ permanent_dir = os.path.join('data', 'uploaded_shapefiles')
134
+ os.makedirs(permanent_dir, exist_ok=True)
135
+
136
+ # Generate a unique subdirectory name using timestamp
137
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
138
+ target_dir = os.path.join(permanent_dir, f'shapefile_{timestamp}')
139
+ os.makedirs(target_dir)
140
+
141
+ # Copy all related files to the permanent location
142
+ shp_base = os.path.splitext(shp_path)[0]
143
+ for ext in ['.shp', '.shx', '.dbf', '.prj', '.cpg', '.sbn', '.sbx']:
144
+ src_file = f"{shp_base}{ext}"
145
+ if os.path.exists(src_file):
146
+ shutil.copy2(src_file, target_dir)
147
+
148
+ # Update the current shapefile path to point to the permanent location
149
+ current_shp_path = os.path.join(target_dir, os.path.basename(shp_path))
150
+
151
+ return f"Shapefile uploaded and processed successfully. {len(regions_gdf)} features loaded.", file_name
152
+
153
+ except Exception as e:
154
+ return f"Error processing shapefile: {str(e)}", None
155
+
156
+ except Exception as e:
157
+ return f"Error processing ZIP upload: {str(e)}", None
158
+
159
+
160
+ def print_route_info(df, shp_file_path, sample_checkbox=False, path_checkbox=False):
161
+ """Print route information to console based on checkbox settings"""
162
+ output_lines = []
163
+
164
+ for _, row in df.iterrows():
165
+ if sample_checkbox:
166
+ date_str = pd.to_datetime(row['created']).strftime('%Y-%m-%d %H:%M:%S')
167
+ output_lines.append(f"\nSample: {row['name']} ({date_str})")
168
+ output_lines.append(f" - Vehicle: {row['vehicle_type']}")
169
+
170
+ if path_checkbox:
171
+ route = row['route'] if isinstance(row['route'], (dict, list)) else json.loads(row['route'])
172
+ output_lines.append(" - Path list:")
173
+
174
+ # Create GeoDataFrame for location lookup
175
+ coords = []
176
+ for loc in route:
177
+ if isinstance(loc, dict):
178
+ if 'latitude' in loc and 'longitude' in loc:
179
+ lat = float(loc['latitude']) / 360000.0
180
+ lng = float(loc['longitude']) / 360000.0
181
+ coords.append((lat, lng))
182
+
183
+ if coords:
184
+ gdf_sample = gpd.GeoDataFrame(
185
+ geometry=[Point(lon, lat) for lat, lon in coords],
186
+ crs="EPSG:4326"
187
+ )
188
+
189
+ # Load regions shapefile using provided path
190
+ regions_gdf = gpd.read_file(shp_file_path).to_crs("EPSG:4326")
191
+
192
+ # Join with regions
193
+ joined = gpd.sjoin(gdf_sample, regions_gdf, how="left", predicate="within")
194
+
195
+ # Get available columns for location info
196
+ location_columns = []
197
+ for col in ['NAME_1', 'NAME_2', 'NAME_3', 'TYPE_3']:
198
+ if col in joined.columns:
199
+ location_columns.append(col)
200
+
201
+ if location_columns:
202
+ # Create location string based on available columns
203
+ joined['location'] = joined[location_columns].astype(str).apply(
204
+ lambda x: "_".join(str(val) for val in x), axis=1
205
+ )
206
+ else:
207
+ # Fallback to coordinates if no matching columns found
208
+ joined['location'] = joined.geometry.apply(
209
+ lambda x: f"lat: {x.y:.6f}, lon: {x.x:.6f}"
210
+ )
211
+
212
+ for _, point in joined.iterrows():
213
+ output_lines.append(f" - {point['location']}")
214
+
215
+ output_lines.append("-" * 50)
216
+
217
+ return "\n".join(output_lines)
218
+
219
+ def get_colors(n, s=1.0, v=1.0):
220
+ colors = []
221
+ for i in range(n):
222
+ h = i / n
223
+ s = s # Maximum saturation
224
+ v = v # Maximum value/brightness
225
+ r, g, b = colorsys.hsv_to_rgb(h, s, v)
226
+ colors.append(f'#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}')
227
+ return colors
228
+
229
+ def cal_paths_folium(df, shp_file_path, n_samples=None, start_d=None, end_d=None, company=None,
230
+ sample_checkbox=False, path_checkbox=False):
231
+
232
+ log_messages = []
233
+ working_df = df.copy()
234
+ log_messages.append(f"Initial dataframe size: {len(working_df)} rows")
235
+
236
+ # Convert created column to datetime and remove timezone information
237
+ working_df['created'] = pd.to_datetime(working_df['created']).dt.tz_localize(None)
238
+
239
+ # Date filtering with better error handling and debugging
240
+ if start_d:
241
+ try:
242
+ start_d = pd.to_datetime(start_d).normalize()
243
+ log_messages.append(f"Filtering from date: {start_d}")
244
+ working_df = working_df[working_df['created'] >= start_d]
245
+ log_messages.append(f"After start date filter: {len(working_df)} rows")
246
+ except Exception as e:
247
+ log_messages.append(f"Error in start date filtering: {str(e)}")
248
+
249
+ if end_d:
250
+ try:
251
+ end_d = pd.to_datetime(end_d).normalize() + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
252
+ log_messages.append(f"Filtering until date: {end_d}")
253
+ working_df = working_df[working_df['created'] <= end_d]
254
+ log_messages.append(f"After end date filter: {len(working_df)} rows")
255
+ except Exception as e:
256
+ log_messages.append(f"Error in end date filtering: {str(e)}")
257
+
258
+ # Company filtering with better error handling and debugging
259
+ if company and company.strip():
260
+ try:
261
+ log_messages.append(f"Filtering for company: {company}")
262
+ working_df = working_df[working_df['name'].str.contains(company, na=False)]
263
+ log_messages.append(f"After company filter: {len(working_df)} rows")
264
+ except Exception as e:
265
+ log_messages.append(f"Error in company filtering: {str(e)}")
266
+
267
+ # Sample n
268
+ if n_samples and len(working_df) > 0:
269
+ working_df = working_df.sample(n=min(n_samples, len(working_df)), random_state=42)
270
+ log_messages.append(f"After sampling: {len(working_df)} rows")
271
+
272
+ # Print column names and a few rows for debugging
273
+ log_messages.append(f"Columns in dataframe: {list(working_df.columns)}")
274
+ if len(working_df) > 0:
275
+ log_messages.append("First row sample:")
276
+ log_messages.append(str(working_df.iloc[0]))
277
+
278
+ # Generate colors
279
+ colors = get_colors(max(1, len(working_df)), s=0.5, v=1.0)
280
+
281
+ # Print route information
282
+ if sample_checkbox or path_checkbox:
283
+ console_output = print_route_info(working_df, shp_file_path, sample_checkbox, path_checkbox)
284
+ log_messages.append(console_output)
285
+
286
+
287
+ # Generate route data
288
+ routes = []
289
+ for i, (_, row) in enumerate(working_df.iterrows()):
290
+ # Convert route to dict/list if it's a string
291
+ route = row['route'] if isinstance(row['route'], (dict, list)) else json.loads(row['route'])
292
+
293
+ # Handle different possible formats of coordinates
294
+ coords = []
295
+ for loc in route:
296
+ if isinstance(loc, dict):
297
+ # Handle 'latitude/longitude' format
298
+ if 'latitude' in loc and 'longitude' in loc:
299
+ lat = float(loc['latitude'])
300
+ lng = float(loc['longitude'])
301
+
302
+ # Scale coordinates if needed
303
+ if abs(lat) > 90 or abs(lng) > 180:
304
+ lat /= 360000.0
305
+ lng /= 360000.0
306
+
307
+ coords.append([lat, lng])
308
+
309
+ # Handle 'lat/lng' format
310
+ elif 'lat' in loc and 'lng' in loc:
311
+ lat = float(loc['lat'])
312
+ lng = float(loc['lng'])
313
+
314
+ # Scale coordinates if needed
315
+ if abs(lat) > 90 or abs(lng) > 180:
316
+ lat /= 360000.0
317
+ lng /= 360000.0
318
+
319
+ coords.append([lat, lng])
320
+
321
+ if coords:
322
+ routes.append({
323
+ 'coordinates': coords,
324
+ 'color': colors[i % len(colors)],
325
+ 'company': str(row.get('name', 'Unknown')),
326
+ 'created': row['created'].strftime('%Y-%m-%d %H:%M:%S')
327
+ })
328
+
329
+ print(f"Generated {len(routes)} valid routes")
330
+ log_messages.append(f"Generated {len(routes)} valid routes")
331
+
332
+ # routes์™€ ํ•จ๊ป˜ ๋กœ๊ทธ ๋ฉ”์‹œ์ง€๋„ ๋ฐ˜ํ™˜
333
+ return routes, "\n".join(log_messages)
334
+
335
+ def plot_paths_folium(routes, cluster_df=cluster_df, cluster_num_samples=None, cluster_company_search=None, cluster_date_start=None, cluster_date_end=None, map_location="Seoul", map_type="Satellite map", path_type="point+line", brightness=100):
336
+ """Plot routes on a Folium map with customizable settings"""
337
+ # Map center coordinates based on location selection
338
+ centers = {
339
+ "Korea": (36.5, 127.5),
340
+ "Seoul": (37.5665, 126.9780),
341
+ "Busan": (35.1796, 129.0756)
342
+ }
343
+ zoom_levels = {
344
+ "Korea": 7,
345
+ "Seoul": 12,
346
+ "Busan": 12
347
+ }
348
+
349
+ center = centers.get(map_location, centers["Korea"])
350
+ zoom_start = zoom_levels.get(map_location, 7)
351
+
352
+
353
+
354
+
355
+
356
+ # Create map with appropriate type
357
+ if map_type == "Satellite map":
358
+ m = folium.Map(location=center, zoom_start=zoom_start,
359
+ tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
360
+ attr='Esri')
361
+ else:
362
+ m = folium.Map(location=center, zoom_start=zoom_start)
363
+
364
+ path_fg = folium.FeatureGroup(name="Path").add_to(m)
365
+
366
+ # Add routes to the map
367
+ for route in routes:
368
+ if path_type in ["point", "point+line"] and len(route['coordinates']) > 0:
369
+ for i, coord in enumerate(route['coordinates']):
370
+ x_icon_html = f'''
371
+ <div style="
372
+ color: {route['color']};
373
+ font-weight: bold;
374
+ font-size: 10px;
375
+ transform: translate(2px, -3px);">
376
+ ร—
377
+ </div>
378
+ '''
379
+ folium.DivIcon(
380
+ html=x_icon_html
381
+ ).add_to(folium.Marker(
382
+ location=coord,
383
+ popup=f"{route.get('company', 'Unknown')} - Point {i+1}"
384
+ ).add_to(path_fg))
385
+
386
+ if path_type in ["line", "point+line"]:
387
+ folium.PolyLine(
388
+ route['coordinates'],
389
+ color=route['color'],
390
+ weight=0.5,
391
+ dash_array='1, 1', # ์ ์„  ์Šคํƒ€์ผ (์„  ๊ธธ์ด, ๊ฐ„๊ฒฉ)
392
+ popup=route.get('company', 'Unknown')
393
+ ).add_to(path_fg)
394
+
395
+ cluster_df['t_pickup'] = pd.to_datetime(cluster_df['t_pickup'])
396
+ if cluster_date_start:
397
+ # Convert string to datetime without timezone
398
+ cluster_date_start = pd.to_datetime(cluster_date_start).normalize()
399
+ cluster_df = cluster_df[cluster_df['t_pickup'] >= cluster_date_start]
400
+
401
+ if cluster_date_end:
402
+ # Convert string to datetime without timezone
403
+ cluster_date_end = pd.to_datetime(cluster_date_end).normalize() + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
404
+ cluster_df = cluster_df[cluster_df['t_pickup'] <= cluster_date_end]
405
+
406
+
407
+ if cluster_company_search:
408
+ cluster_df = cluster_df.query("company.str.contains(@cluster_company_search)")
409
+
410
+
411
+ if cluster_num_samples:
412
+ cluster_df = cluster_df.sample(n=min(cluster_num_samples, len(cluster_df)), random_state=42)
413
+
414
+
415
+
416
+ cluster_geo_fg = folium.FeatureGroup(name="Cluster Geo").add_to(m)
417
+ cluster_pmi_fg = folium.FeatureGroup(name="Cluster PMI", show=False).add_to(m)
418
+
419
+
420
+ cluster_geo_values = cluster_df['cluster_geo'].unique()
421
+ cluster_pmi_values = cluster_df['cluster_pmi'].unique()
422
+
423
+ # Create a mapping from cluster numbers to color indices
424
+ cluster_geo_mapping = {val: idx for idx, val in enumerate(sorted(cluster_geo_values))}
425
+ cluster_pmi_mapping = {val: idx for idx, val in enumerate(sorted(cluster_pmi_values))}
426
+
427
+ cluster_geo_colors = get_colors(len(cluster_geo_values))
428
+ cluster_pmi_colors = get_colors(len(cluster_pmi_values))
429
+
430
+ for _, row in cluster_df.iterrows():
431
+ # Geo cluster markers remain as circles
432
+ folium.CircleMarker(
433
+ location=(row['latitude'], row['longitude']),
434
+ popup=f"{row['company']} - Cluster {row['cluster_geo']}",
435
+ radius=3,
436
+ color=cluster_geo_colors[cluster_geo_mapping[row['cluster_geo']]],
437
+ fill=True,
438
+ fill_color=cluster_geo_colors[cluster_geo_mapping[row['cluster_geo']]],
439
+ ).add_to(cluster_geo_fg)
440
+
441
+ # PMI cluster markers as stars
442
+ star_html = f'''
443
+ <div style="
444
+ color: {cluster_pmi_colors[cluster_pmi_mapping[row['cluster_pmi']]]};
445
+ font-size: 16px;
446
+ transform: translate(-1px, -7px);
447
+ text-shadow: 1px 1px 2px black;">
448
+ โ˜…
449
+ </div>
450
+ '''
451
+ folium.DivIcon(
452
+ html=star_html
453
+ ).add_to(folium.Marker(
454
+ location=(row['latitude'], row['longitude']),
455
+ popup=f"{row['company']} - Cluster {row['cluster_pmi']}",
456
+ ).add_to(cluster_pmi_fg))
457
+
458
+ # Group points by cluster for both geo and pmi
459
+ geo_clusters = {}
460
+ pmi_clusters = {}
461
+
462
+ for _, row in cluster_df.iterrows():
463
+ # For geo clusters
464
+ geo_cluster = row['cluster_geo']
465
+ if geo_cluster not in geo_clusters:
466
+ geo_clusters[geo_cluster] = []
467
+ geo_clusters[geo_cluster].append((row['latitude'], row['longitude']))
468
+
469
+ # For pmi clusters
470
+ pmi_cluster = row['cluster_pmi']
471
+ if pmi_cluster not in pmi_clusters:
472
+ pmi_clusters[pmi_cluster] = []
473
+ pmi_clusters[pmi_cluster].append((row['latitude'], row['longitude']))
474
+
475
+ # Function to create a closed path by connecting nearest points
476
+ def create_closed_path(points):
477
+ if len(points) <= 1:
478
+ return points
479
+
480
+ # Start with the first point
481
+ path = [points[0]]
482
+ remaining_points = points[1:]
483
+
484
+ # Keep finding the closest point until none are left
485
+ while remaining_points:
486
+ current = path[-1]
487
+
488
+ # Find closest point to the current point
489
+ closest_idx = 0
490
+ closest_dist = float('inf')
491
+
492
+ for i, point in enumerate(remaining_points):
493
+ dist = ((current[0] - point[0])**2 + (current[1] - point[1])**2)**0.5
494
+ if dist < closest_dist:
495
+ closest_dist = dist
496
+ closest_idx = i
497
+
498
+ # Add the closest point to the path
499
+ path.append(remaining_points[closest_idx])
500
+ remaining_points.pop(closest_idx)
501
+
502
+ # Connect back to the first point to close the path
503
+ path.append(path[0])
504
+ return path
505
+
506
+ # Create polylines for geo clusters
507
+ for cluster_num, points in geo_clusters.items():
508
+ if len(points) >= 2: # Need at least 2 points to make a line
509
+ path = create_closed_path(points)
510
+ folium.PolyLine(
511
+ path,
512
+ color=cluster_geo_colors[cluster_geo_mapping[cluster_num]],
513
+ weight=2,
514
+ ).add_to(cluster_geo_fg)
515
+
516
+ # Create polylines for pmi clusters
517
+ for cluster_num, points in pmi_clusters.items():
518
+ if len(points) >= 2: # Need at least 2 points to make a line
519
+ path = create_closed_path(points)
520
+ folium.PolyLine(
521
+ path,
522
+ color=cluster_pmi_colors[cluster_pmi_mapping[cluster_num]],
523
+ weight=2,
524
+ ).add_to(cluster_pmi_fg)
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+ # Create custom legend HTML with three scrollable sections
533
+ legend_html = '''
534
+ <div style="position: fixed;
535
+ top: 120px;
536
+ right: 10px;
537
+ width: 200px;
538
+ background-color: transparent;
539
+ z-index: 1000;">
540
+
541
+ <!-- Path Legend -->
542
+ <div style="margin-bottom: 5px;
543
+ background-color: white;
544
+ border: 2px solid grey;
545
+ font-size: 10px;">
546
+ <div style="padding: 5px; background-color: #f0f0f0; font-weight: bold;">Path Routes</div>
547
+ <div style="height: 200px;
548
+ overflow-y: auto;
549
+ padding: 10px;">
550
+ '''
551
+
552
+ # Add path routes to the legend with larger X symbol
553
+ for route in routes:
554
+ legend_html += f'''
555
+ <div style="display: flex;
556
+ align-items: center;
557
+ margin: 5px 0;">
558
+ <div style="width: 20px;
559
+ height: 20px;
560
+ margin-right: 5px;
561
+ flex-shrink: 0;
562
+ display: flex;
563
+ align-items: center;
564
+ justify-content: center;
565
+ color: {route['color']};
566
+ font-weight: bold;
567
+ font-size: 20px;">
568
+ ร—
569
+ </div>
570
+ <span style="word-break: break-all;">
571
+ {route.get('company', 'Unknown')}_{route.get('created', '')}
572
+ </span>
573
+ </div>
574
+ '''
575
+
576
+ # Get unique cluster values from already filtered cluster_df
577
+ visible_cluster_geo = sorted(cluster_df['cluster_geo'].unique())
578
+ visible_cluster_pmi = sorted(cluster_df['cluster_pmi'].unique())
579
+
580
+ # Add Cluster Geo section with larger circle symbol
581
+ legend_html += '''
582
+ </div>
583
+ </div>
584
+
585
+ <!-- Cluster Geo Legend -->
586
+ <div style="margin-bottom: 5px;
587
+ background-color: white;
588
+ border: 2px solid grey;
589
+ font-size: 10px;">
590
+ <div style="padding: 5px; background-color: #f0f0f0; font-weight: bold;">Cluster Geo</div>
591
+ <div style="height: 200px;
592
+ overflow-y: auto;
593
+ padding: 10px;">
594
+ '''
595
+
596
+ # Add only visible cluster geo information with larger circles
597
+ for cluster_value in visible_cluster_geo:
598
+ color = cluster_geo_colors[cluster_geo_mapping[cluster_value]]
599
+ legend_html += f'''
600
+ <div style="display: flex;
601
+ align-items: center;
602
+ margin: 5px 0;">
603
+ <div style="width: 20px;
604
+ height: 20px;
605
+ margin-right: 5px;
606
+ flex-shrink: 0;
607
+ display: flex;
608
+ align-items: center;
609
+ justify-content: center;">
610
+ <div style="width: 10px;
611
+ height: 10px;
612
+ background-color: {color};
613
+ border-radius: 50%;"></div>
614
+ </div>
615
+ <span style="word-break: break-all;">
616
+ Cluster {cluster_value}
617
+ </span>
618
+ </div>
619
+ '''
620
+
621
+ # Add Cluster PMI section with larger star symbol
622
+ legend_html += '''
623
+ </div>
624
+ </div>
625
+
626
+ <!-- Cluster PMI Legend -->
627
+ <div style="background-color: white;
628
+ border: 2px solid grey;
629
+ font-size: 10px;">
630
+ <div style="padding: 5px; background-color: #f0f0f0; font-weight: bold;">Cluster PMI</div>
631
+ <div style="height: 200px;
632
+ overflow-y: auto;
633
+ padding: 10px;">
634
+ '''
635
+
636
+ # Add only visible cluster PMI information with larger stars
637
+ for cluster_value in visible_cluster_pmi:
638
+ color = cluster_pmi_colors[cluster_pmi_mapping[cluster_value]]
639
+ legend_html += f'''
640
+ <div style="display: flex;
641
+ align-items: center;
642
+ margin: 5px 0;">
643
+ <div style="width: 20px;
644
+ height: 20px;
645
+ margin-right: 5px;
646
+ flex-shrink: 0;
647
+ display: flex;
648
+ align-items: center;
649
+ justify-content: center;
650
+ color: {color};
651
+ font-size: 18px;
652
+ text-shadow: 1px 1px 2px black;">
653
+ โ˜…
654
+ </div>
655
+ <span style="word-break: break-all;">
656
+ Cluster {cluster_value}
657
+ </span>
658
+ </div>
659
+ '''
660
+
661
+ legend_html += '''
662
+ </div>
663
+ </div>
664
+ </div>
665
+ '''
666
+
667
+ folium.LayerControl(collapsed=False).add_to(m)
668
+
669
+ folium.plugins.Fullscreen(
670
+ position="bottomright",
671
+ title="Expand me",
672
+ title_cancel="Exit me",
673
+ force_separate_button=True,
674
+ ).add_to(m)
675
+
676
+ # Add the legend to the map
677
+ m.get_root().html.add_child(folium.Element(legend_html))
678
+
679
+ # Add custom CSS for brightness control - only affecting the satellite tiles
680
+ custom_css = f"""
681
+ <style>
682
+ .leaflet-tile-pane img {{
683
+ filter: brightness({brightness}%);
684
+ }}
685
+ </style>
686
+ """
687
+ m.get_root().header.add_child(folium.Element(custom_css))
688
+
689
+ return m._repr_html_()
690
+
691
+
692
+ def update_map(map_location, map_type, path_type, n_samples, company, date_start, date_end,
693
+ cluster_num_samples, cluster_company_search, cluster_date_start, cluster_date_end,
694
+ pick_all_date, sample_checkbox, path_checkbox, brightness_slider):
695
+ """Update the map based on user selections"""
696
+ global df, cluster_df, regions_gdf, current_shp_path
697
+
698
+ log_messages = []
699
+ log_messages.append(f"Updating map with settings: Location={map_location}, Type={map_type}, Path={path_type}")
700
+
701
+ # Check if data is loaded
702
+ if df is None:
703
+ log_messages.append("Loading default data because df is None")
704
+ df_loaded, msg, _ = load_default_data()
705
+ if df_loaded is None:
706
+ return "No data available. Please upload a CSV file.", None
707
+ else:
708
+ log_messages.append(f"Using existing df with {len(df)} rows")
709
+
710
+ try:
711
+ # Process date filters with better error handling
712
+ start_d = None
713
+ end_d = None
714
+
715
+ if not pick_all_date:
716
+ if date_start and date_start.strip():
717
+ start_d = date_start
718
+ log_messages.append(f"Using start date: {start_d}")
719
+ if date_end and date_end.strip():
720
+ end_d = date_end
721
+ log_messages.append(f"Using end date: {end_d}")
722
+ else:
723
+ log_messages.append("Using all dates")
724
+
725
+ # Check if shapefile exists at current_shp_path
726
+ if not os.path.exists(current_shp_path):
727
+ log_messages.append(f"Warning: Shapefile not found at {current_shp_path}")
728
+ # Try to find the most recently uploaded shapefile
729
+ permanent_dir = os.path.join('data', 'uploaded_shapefiles')
730
+ if os.path.exists(permanent_dir):
731
+ subdirs = [os.path.join(permanent_dir, d) for d in os.listdir(permanent_dir)
732
+ if os.path.isdir(os.path.join(permanent_dir, d))]
733
+ if subdirs:
734
+ # Get the most recent directory
735
+ latest_dir = max(subdirs, key=os.path.getctime)
736
+ # Find .shp file in that directory
737
+ shp_files = [f for f in os.listdir(latest_dir) if f.endswith('.shp')]
738
+ if shp_files:
739
+ current_shp_path = os.path.join(latest_dir, shp_files[0])
740
+ log_messages.append(f"Using most recent shapefile: {current_shp_path}")
741
+
742
+ # Calculate routes with full error reporting
743
+ try:
744
+ routes, cal_logs = cal_paths_folium(df, current_shp_path, n_samples=n_samples,
745
+ start_d=start_d, end_d=end_d,
746
+ company=company, sample_checkbox=sample_checkbox,
747
+ path_checkbox=path_checkbox)
748
+ log_messages.append(cal_logs)
749
+ except Exception as e:
750
+ log_messages.append(f"Error in route calculation: {str(e)}")
751
+ import traceback
752
+ log_messages.append(traceback.format_exc())
753
+ return "\n".join(log_messages), None
754
+
755
+ # Check if we have routes to display
756
+ if not routes:
757
+ log_messages.append("No routes to display after applying filters.")
758
+ empty_map = folium.Map(location=(36.5, 127.5), zoom_start=7)
759
+ return "\n".join(log_messages), empty_map._repr_html_()
760
+
761
+ # Create map
762
+ html_output = plot_paths_folium(routes, cluster_df, cluster_num_samples, cluster_company_search,
763
+ cluster_date_start, cluster_date_end, map_location, map_type, path_type, brightness_slider)
764
+
765
+ return "\n".join(log_messages), html_output
766
+
767
+ except Exception as e:
768
+ error_msg = f"Error updating map: {str(e)}"
769
+ log_messages.append(error_msg)
770
+ import traceback
771
+ log_messages.append(traceback.format_exc())
772
+ return "\n".join(log_messages), None
773
+
774
+ # Initialize data
775
+
776
+
777
+ def load_default_data():
778
+ """Load the default dataset"""
779
+ global df, cluster_df, regions_gdf
780
+ default_file = 'data/20250122_Order_List_202411_12_CJW.csv'
781
+ default_cluster_file = 'data/path_clustering_2024.csv'
782
+ default_gadm_shp_file = 'data/gadm41_KOR_shp/gadm41_KOR_3.shp'
783
+
784
+ messages = []
785
+ path_filename = ""
786
+ cluster_filename = ""
787
+ shp_filename = ""
788
+
789
+ # Try different encodings for the main file
790
+ for encoding in ['utf-8', 'cp949', 'euc-kr']:
791
+ try:
792
+ df = pd.read_csv(default_file, engine='python', encoding=encoding)
793
+ path_filename = os.path.basename(default_file)
794
+ messages.append(f"Path file loaded successfully: {path_filename}")
795
+ break
796
+ except UnicodeDecodeError:
797
+ continue
798
+ except Exception as e:
799
+ messages.append(f"Error loading path file: {str(e)}")
800
+ return None, None, None, "\n".join(messages), "", "", ""
801
+
802
+ # Try different encodings for the cluster file
803
+ for encoding in ['utf-8', 'cp949', 'euc-kr']:
804
+ try:
805
+ cluster_df = pd.read_csv(default_cluster_file, engine='python', encoding=encoding)
806
+ cluster_filename = os.path.basename(default_cluster_file)
807
+ messages.append(f"Cluster file loaded successfully: {cluster_filename}")
808
+ break
809
+ except UnicodeDecodeError:
810
+ continue
811
+ except Exception as e:
812
+ messages.append(f"Error loading cluster file: {str(e)}")
813
+ return None, None, None, "\n".join(messages), "", "", ""
814
+
815
+ # Load shapefile
816
+ try:
817
+ regions_gdf = gpd.read_file(default_gadm_shp_file).to_crs("EPSG:4326")
818
+ shp_filename = os.path.basename(default_gadm_shp_file)
819
+ messages.append(f"Shapefile loaded successfully: {shp_filename}")
820
+ except Exception as e:
821
+ messages.append(f"Error loading shapefile: {str(e)}")
822
+ return None, None, None, "\n".join(messages), "", "", ""
823
+
824
+ return df, cluster_df, regions_gdf, "\n".join(messages), path_filename, cluster_filename, shp_filename
825
+
826
+ init_n_samples = 20
827
+ init_path_company_search = "๋ฐฑ๋…„ํ™”ํŽธ"
828
+ init_path_date_start = "2024-12-01"
829
+ init_path_date_end = "2024-12-31"
830
+ init_cluster_num_samples = 200
831
+ init_cluster_date_start = "2025-02-24"
832
+ init_cluster_date_end = "2025-02-24"
833
+ init_brightness = 50
834
+
835
+
836
+ init_df, init_cluster_df, init_regions_gdf, init_msg, init_path_file, init_cluster_file, init_shp_file = load_default_data()
837
+
838
+
839
+ # Initial map
840
+ init_shp_file_path = 'data/gadm41_KOR_shp/gadm41_KOR_3.shp'
841
+ init_routes, _ = cal_paths_folium(df, init_shp_file_path, n_samples=init_n_samples,
842
+ start_d=init_path_date_start, end_d=init_path_date_end,
843
+ company=init_path_company_search) if df is not None else ([], "")
844
+ init_html = plot_paths_folium(routes=init_routes, cluster_df=init_cluster_df, cluster_num_samples=init_cluster_num_samples, cluster_date_start=init_cluster_date_start, cluster_date_end=init_cluster_date_end, brightness=init_brightness) if init_routes else None
845
+
846
+ # Create Gradio interface
847
+ with gr.Blocks() as demo:
848
+ # Layout
849
+ with gr.Column():
850
+ # Map controls
851
+ with gr.Row():
852
+ map_location = gr.Radio(
853
+ ["Korea", "Seoul", "Busan"],
854
+ label="Map Location Shortcuts",
855
+ value="Seoul"
856
+ )
857
+ map_type = gr.Radio(
858
+ ["Normal map", "Satellite map"],
859
+ label="Map Type",
860
+ value="Satellite map"
861
+ )
862
+ path_type = gr.Radio(
863
+ ["point", "line", "point+line"],
864
+ label="Path Type",
865
+ value="point+line"
866
+ )
867
+ brightness_slider = gr.Slider(
868
+ minimum=1,
869
+ maximum=300,
870
+ value=50,
871
+ step=1,
872
+ label="Map Brightness (%)"
873
+ )
874
+
875
+ # Map display
876
+ map_html = gr.HTML(init_html, elem_classes=["map-container"])
877
+
878
+ generate_btn = gr.Button("Generate Map")
879
+
880
+ # Filter controls
881
+ with gr.Column():
882
+ with gr.Row():
883
+ path_file_upload = gr.File(label="Upload Path File", height=89, file_count="single", scale=1)
884
+ path_current_file = gr.Textbox(label="Current Path File", value=init_path_file, scale=4)
885
+ with gr.Row():
886
+ cluster_file_upload = gr.File(label="Upload Cluster File", height=89, file_count="single", scale=1)
887
+ cluster_current_file = gr.Textbox(label="Current Cluster File", value=init_cluster_file, scale=4)
888
+ with gr.Row():
889
+ gadm_shp_upload = gr.File(label="Upload gadm .zip File", height=89, file_count="single", scale=1)
890
+ gadm_shp_current_file = gr.Textbox(label="Current gadm .zip File", value=init_shp_file, scale=4)
891
+ with gr.Row():
892
+ with gr.Row():
893
+ path_num_samples = gr.Number(label="Path Sample Count", precision=0, value=20, scale=1, minimum=1, maximum=200)
894
+ path_company_search = gr.Textbox(label="Path Company Search", value="๋ฐฑ๋…„ํ™”ํŽธ", scale=4)
895
+ with gr.Row():
896
+ cluster_num_samples = gr.Number(label="Cluster Sample Count", precision=0, value=200, scale=1, minimum=1, maximum=200)
897
+ cluster_company_search = gr.Textbox(label="Cluster Company Search", scale=4)
898
+ # Date range
899
+ with gr.Row():
900
+ with gr.Row():
901
+ path_date_start = gr.Textbox(label="Path Start Date", placeholder="YYYY-MM-DD", value="2024-12-01")
902
+ path_date_end = gr.Textbox(label="Path End Date", placeholder="YYYY-MM-DD", value="2024-12-31")
903
+ with gr.Row():
904
+ cluster_date_start = gr.Textbox(label="Cluster Start Date", placeholder="YYYY-MM-DD", value="2025-02-24")
905
+ cluster_date_end = gr.Textbox(label="Cluster End Date", placeholder="YYYY-MM-DD", value="2025-02-24")
906
+
907
+ # Checkboxes
908
+ with gr.Row():
909
+ pick_all_date = gr.Checkbox(label="Select All Dates")
910
+ sample_checkbox = gr.Checkbox(label="Print Sample", value=True)
911
+ path_checkbox = gr.Checkbox(label="Print Path")
912
+
913
+ # Console
914
+ console = gr.Textbox(
915
+ label="Console",
916
+ lines=10,
917
+ max_lines=100,
918
+ interactive=False,
919
+ value=init_msg,
920
+ elem_classes=["console"]
921
+ )
922
+
923
+ # Style
924
+ gr.Markdown("""
925
+ <style>
926
+ .map-container {
927
+ margin: 10px;
928
+ width: calc(100% - 20px);
929
+ height: 600px;
930
+ }
931
+ .console {
932
+ background-color: black;
933
+ color: white;
934
+ font-family: monospace;
935
+ overflow-y: scroll;
936
+ }
937
+ </style>
938
+ """)
939
+
940
+ # Event handlers
941
+ path_file_upload.upload(
942
+ fn=process_upload,
943
+ inputs=[path_file_upload],
944
+ outputs=[console, path_current_file]
945
+ )
946
+ cluster_file_upload.upload(
947
+ fn=process_cluster_upload,
948
+ inputs=[cluster_file_upload],
949
+ outputs=[console, cluster_current_file]
950
+ )
951
+ gadm_shp_upload.upload(
952
+ fn=process_shp_upload,
953
+ inputs=[gadm_shp_upload],
954
+ outputs=[console, gadm_shp_current_file]
955
+ )
956
+
957
+ generate_btn.click(
958
+ fn=update_map,
959
+ inputs=[
960
+ map_location, map_type, path_type, path_num_samples, path_company_search,
961
+ path_date_start, path_date_end, cluster_num_samples, cluster_company_search,
962
+ cluster_date_start, cluster_date_end, pick_all_date, sample_checkbox, path_checkbox,
963
+ brightness_slider
964
+ ],
965
+ outputs=[console, map_html]
966
+ )
967
+
968
+ # Auto-update radio buttons
969
+ for control in [map_location, map_type, path_type, brightness_slider]:
970
+ control.change(
971
+ fn=update_map,
972
+ inputs=[
973
+ map_location, map_type, path_type, path_num_samples, path_company_search,
974
+ path_date_start, path_date_end, cluster_num_samples, cluster_company_search,
975
+ cluster_date_start, cluster_date_end, pick_all_date, sample_checkbox, path_checkbox,
976
+ brightness_slider
977
+ ],
978
+ outputs=[console, map_html]
979
+ )
980
+
981
+ # Launch the app
982
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ folium
4
+ gradio
5
+ geopandas
6
+ shapely
7
+ git-lfs