import pandas as pd from geopy.distance import geodesic # Function to calculate distances while preserving all original columns def calculate_distances( df1: pd.DataFrame, df2: pd.DataFrame, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2, ): distances = [] for _, row1 in df1.iterrows(): for _, row2 in df2.iterrows(): coord1 = (row1[lat_col1], row1[long_col1]) coord2 = (row2[lat_col2], row2[long_col2]) distance_km = geodesic(coord1, coord2).kilometers # Compute distance # Combine all original columns + distance combined_row = { **row1.to_dict(), # Keep all columns from Dataset1 **{ f"{col}_Dataset2": row2[col] for col in df2.columns }, # Keep all columns from Dataset2 "Distance_km": distance_km, } distances.append(combined_row) df_distances = pd.DataFrame(distances) # Find the closest point for each Point1 df_closest: pd.DataFrame = df_distances.loc[ df_distances.groupby(code_col1)["Distance_km"].idxmin() ] return df_distances, df_closest