|
import pandas as pd |
|
import streamlit as st |
|
from geopy.distance import geodesic |
|
|
|
|
|
|
|
def calculate_distances( |
|
df1: pd.DataFrame, |
|
df2: pd.DataFrame, |
|
code_col1, |
|
lat_col1, |
|
long_col1, |
|
code_col2, |
|
lat_col2, |
|
long_col2, |
|
): |
|
distances = [] |
|
|
|
for _, row1 in df1.iterrows(): |
|
for _, row2 in df2.iterrows(): |
|
coord1 = (row1[lat_col1], row1[long_col1]) |
|
coord2 = (row2[lat_col2], row2[long_col2]) |
|
distance_km = geodesic(coord1, coord2).kilometers |
|
|
|
|
|
combined_row = { |
|
**row1.to_dict(), |
|
**{ |
|
f"{col}_Dataset2": row2[col] for col in df2.columns |
|
}, |
|
"Distance_km": distance_km, |
|
} |
|
distances.append(combined_row) |
|
|
|
df_distances = pd.DataFrame(distances) |
|
|
|
|
|
df_closest: pd.DataFrame = df_distances.loc[ |
|
df_distances.groupby(code_col1)["Distance_km"].idxmin() |
|
] |
|
|
|
return df_distances, df_closest |
|
|
|
|
|
|
|
st.title("Multi Points Distance Calculator") |
|
|
|
st.write( |
|
"""This app allows you to calculate the distance between multiple points in 2 datasets. |
|
Please choose a file containing the latitude and longitude columns for each dataset. |
|
""" |
|
) |
|
|
|
dataset1_sample_file_path = "samples/Dataset1.xlsx" |
|
dataset2_sample_file_path = "samples/Dataset2.xlsx" |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.download_button( |
|
label="Dataset1 Sample File", |
|
data=open(dataset1_sample_file_path, "rb").read(), |
|
file_name="Dataset1.xlsx", |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
) |
|
with col2: |
|
st.download_button( |
|
label="Dataset2 Sample File", |
|
data=open(dataset2_sample_file_path, "rb").read(), |
|
file_name="Dataset2.xlsx", |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
) |
|
|
|
|
|
|
|
st.subheader("Upload Dataset 1 (Reference Points)") |
|
file1 = st.file_uploader("Upload first dataset (Excel)", type=["xlsx"], key="file1") |
|
|
|
|
|
st.subheader("Upload Dataset 2 (Comparison Points)") |
|
file2 = st.file_uploader("Upload second dataset (Excel)", type=["xlsx"], key="file2") |
|
|
|
if file1 and file2: |
|
try: |
|
|
|
df1 = pd.read_excel(file1) |
|
df2 = pd.read_excel(file2) |
|
|
|
st.subheader("Select Columns for Dataset 1") |
|
code_col1 = st.selectbox("Select 'CODE' Column", df1.columns, key="code1") |
|
lat_col1 = st.selectbox("Select 'Latitude' Column", df1.columns, key="lat1") |
|
long_col1 = st.selectbox("Select 'Longitude' Column", df1.columns, key="long1") |
|
|
|
st.subheader("Select Columns for Dataset 2") |
|
code_col2 = st.selectbox("Select 'CODE' Column", df2.columns, key="code2") |
|
lat_col2 = st.selectbox("Select 'Latitude' Column", df2.columns, key="lat2") |
|
long_col2 = st.selectbox("Select 'Longitude' Column", df2.columns, key="long2") |
|
|
|
|
|
if st.button("Calculate Distances"): |
|
df_distances, df_closest = calculate_distances( |
|
df1, df2, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2 |
|
) |
|
|
|
|
|
st.subheader("All Distances") |
|
st.dataframe(df_distances) |
|
|
|
|
|
st.subheader("Closest Matches") |
|
st.dataframe(df_closest) |
|
|
|
|
|
st.download_button( |
|
label="Download Closest Matches as CSV", |
|
data=df_closest.to_csv(index=False), |
|
file_name="closest_matches.csv", |
|
mime="text/csv", |
|
) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing files: {e}") |
|
|