File size: 1,222 Bytes
5a8534e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
from geopy.distance import geodesic


# Function to calculate distances while preserving all original columns
def calculate_distances(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    code_col1,
    lat_col1,
    long_col1,
    code_col2,
    lat_col2,
    long_col2,
):
    distances = []

    for _, row1 in df1.iterrows():
        for _, row2 in df2.iterrows():
            coord1 = (row1[lat_col1], row1[long_col1])
            coord2 = (row2[lat_col2], row2[long_col2])
            distance_km = geodesic(coord1, coord2).kilometers  # Compute distance

            # Combine all original columns + distance
            combined_row = {
                **row1.to_dict(),  # Keep all columns from Dataset1
                **{
                    f"{col}_Dataset2": row2[col] for col in df2.columns
                },  # Keep all columns from Dataset2
                "Distance_km": distance_km,
            }
            distances.append(combined_row)

    df_distances = pd.DataFrame(distances)

    # Find the closest point for each Point1
    df_closest: pd.DataFrame = df_distances.loc[
        df_distances.groupby(code_col1)["Distance_km"].idxmin()
    ]

    return df_distances, df_closest