import pandas as pd | |
from geopy.distance import geodesic | |
# Function to calculate distances while preserving all original columns | |
def calculate_distances( | |
df1: pd.DataFrame, | |
df2: pd.DataFrame, | |
code_col1, | |
lat_col1, | |
long_col1, | |
code_col2, | |
lat_col2, | |
long_col2, | |
): | |
distances = [] | |
for _, row1 in df1.iterrows(): | |
for _, row2 in df2.iterrows(): | |
coord1 = (row1[lat_col1], row1[long_col1]) | |
coord2 = (row2[lat_col2], row2[long_col2]) | |
distance_km = geodesic(coord1, coord2).kilometers # Compute distance | |
# Combine all original columns + distance | |
combined_row = { | |
**row1.to_dict(), # Keep all columns from Dataset1 | |
**{ | |
f"{col}_Dataset2": row2[col] for col in df2.columns | |
}, # Keep all columns from Dataset2 | |
"Distance_km": distance_km, | |
} | |
distances.append(combined_row) | |
df_distances = pd.DataFrame(distances) | |
# Find the closest point for each Point1 | |
df_closest: pd.DataFrame = df_distances.loc[ | |
df_distances.groupby(code_col1)["Distance_km"].idxmin() | |
] | |
return df_distances, df_closest | |