File size: 4,088 Bytes
f3e5a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb325dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3e5a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import streamlit as st
from geopy.distance import geodesic


# Function to calculate distances while preserving all original columns
def calculate_distances(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    code_col1,
    lat_col1,
    long_col1,
    code_col2,
    lat_col2,
    long_col2,
):
    distances = []

    for _, row1 in df1.iterrows():
        for _, row2 in df2.iterrows():
            coord1 = (row1[lat_col1], row1[long_col1])
            coord2 = (row2[lat_col2], row2[long_col2])
            distance_km = geodesic(coord1, coord2).kilometers  # Compute distance

            # Combine all original columns + distance
            combined_row = {
                **row1.to_dict(),  # Keep all columns from Dataset1
                **{
                    f"{col}_Dataset2": row2[col] for col in df2.columns
                },  # Keep all columns from Dataset2
                "Distance_km": distance_km,
            }
            distances.append(combined_row)

    df_distances = pd.DataFrame(distances)

    # Find the closest point for each Point1
    df_closest: pd.DataFrame = df_distances.loc[
        df_distances.groupby(code_col1)["Distance_km"].idxmin()
    ]

    return df_distances, df_closest


# Streamlit UI
st.title("Multi Points Distance Calculator")

st.write(
    """This app allows you to calculate the distance between multiple points in 2 datasets.
    Please choose a file containing the latitude and longitude columns for each dataset.
                      """
)

dataset1_sample_file_path = "samples/Dataset1.xlsx"
dataset2_sample_file_path = "samples/Dataset2.xlsx"

col1, col2, col3 = st.columns(3)

with col1:
    st.download_button(
        label="Dataset1 Sample File",
        data=open(dataset1_sample_file_path, "rb").read(),
        file_name="Dataset1.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )
with col2:
    st.download_button(
        label="Dataset2 Sample File",
        data=open(dataset2_sample_file_path, "rb").read(),
        file_name="Dataset2.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )


# Upload Dataset 1
st.subheader("Upload Dataset 1 (Reference Points)")
file1 = st.file_uploader("Upload first dataset (Excel)", type=["xlsx"], key="file1")

# Upload Dataset 2
st.subheader("Upload Dataset 2 (Comparison Points)")
file2 = st.file_uploader("Upload second dataset (Excel)", type=["xlsx"], key="file2")

if file1 and file2:
    try:
        # Read the datasets
        df1 = pd.read_excel(file1)
        df2 = pd.read_excel(file2)

        st.subheader("Select Columns for Dataset 1")
        code_col1 = st.selectbox("Select 'CODE' Column", df1.columns, key="code1")
        lat_col1 = st.selectbox("Select 'Latitude' Column", df1.columns, key="lat1")
        long_col1 = st.selectbox("Select 'Longitude' Column", df1.columns, key="long1")

        st.subheader("Select Columns for Dataset 2")
        code_col2 = st.selectbox("Select 'CODE' Column", df2.columns, key="code2")
        lat_col2 = st.selectbox("Select 'Latitude' Column", df2.columns, key="lat2")
        long_col2 = st.selectbox("Select 'Longitude' Column", df2.columns, key="long2")

        # Calculate distances when button is clicked
        if st.button("Calculate Distances"):
            df_distances, df_closest = calculate_distances(
                df1, df2, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2
            )

            # Display all distances
            st.subheader("All Distances")
            st.dataframe(df_distances)

            # Display closest points
            st.subheader("Closest Matches")
            st.dataframe(df_closest)

            # Downloadable CSV
            st.download_button(
                label="Download Closest Matches as CSV",
                data=df_closest.to_csv(index=False),
                file_name="closest_matches.csv",
                mime="text/csv",
            )

    except Exception as e:
        st.error(f"Error processing files: {e}")