db_query / apps /multi_points_distance_calculator.py
DavMelchi's picture
Adding samples files to multi point dist cal
cb325dd
raw
history blame
4.09 kB
import pandas as pd
import streamlit as st
from geopy.distance import geodesic
# Function to calculate distances while preserving all original columns
def calculate_distances(
df1: pd.DataFrame,
df2: pd.DataFrame,
code_col1,
lat_col1,
long_col1,
code_col2,
lat_col2,
long_col2,
):
distances = []
for _, row1 in df1.iterrows():
for _, row2 in df2.iterrows():
coord1 = (row1[lat_col1], row1[long_col1])
coord2 = (row2[lat_col2], row2[long_col2])
distance_km = geodesic(coord1, coord2).kilometers # Compute distance
# Combine all original columns + distance
combined_row = {
**row1.to_dict(), # Keep all columns from Dataset1
**{
f"{col}_Dataset2": row2[col] for col in df2.columns
}, # Keep all columns from Dataset2
"Distance_km": distance_km,
}
distances.append(combined_row)
df_distances = pd.DataFrame(distances)
# Find the closest point for each Point1
df_closest: pd.DataFrame = df_distances.loc[
df_distances.groupby(code_col1)["Distance_km"].idxmin()
]
return df_distances, df_closest
# Streamlit UI
st.title("Multi Points Distance Calculator")
st.write(
"""This app allows you to calculate the distance between multiple points in 2 datasets.
Please choose a file containing the latitude and longitude columns for each dataset.
"""
)
dataset1_sample_file_path = "samples/Dataset1.xlsx"
dataset2_sample_file_path = "samples/Dataset2.xlsx"
col1, col2, col3 = st.columns(3)
with col1:
st.download_button(
label="Dataset1 Sample File",
data=open(dataset1_sample_file_path, "rb").read(),
file_name="Dataset1.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
with col2:
st.download_button(
label="Dataset2 Sample File",
data=open(dataset2_sample_file_path, "rb").read(),
file_name="Dataset2.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
# Upload Dataset 1
st.subheader("Upload Dataset 1 (Reference Points)")
file1 = st.file_uploader("Upload first dataset (Excel)", type=["xlsx"], key="file1")
# Upload Dataset 2
st.subheader("Upload Dataset 2 (Comparison Points)")
file2 = st.file_uploader("Upload second dataset (Excel)", type=["xlsx"], key="file2")
if file1 and file2:
try:
# Read the datasets
df1 = pd.read_excel(file1)
df2 = pd.read_excel(file2)
st.subheader("Select Columns for Dataset 1")
code_col1 = st.selectbox("Select 'CODE' Column", df1.columns, key="code1")
lat_col1 = st.selectbox("Select 'Latitude' Column", df1.columns, key="lat1")
long_col1 = st.selectbox("Select 'Longitude' Column", df1.columns, key="long1")
st.subheader("Select Columns for Dataset 2")
code_col2 = st.selectbox("Select 'CODE' Column", df2.columns, key="code2")
lat_col2 = st.selectbox("Select 'Latitude' Column", df2.columns, key="lat2")
long_col2 = st.selectbox("Select 'Longitude' Column", df2.columns, key="long2")
# Calculate distances when button is clicked
if st.button("Calculate Distances"):
df_distances, df_closest = calculate_distances(
df1, df2, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2
)
# Display all distances
st.subheader("All Distances")
st.dataframe(df_distances)
# Display closest points
st.subheader("Closest Matches")
st.dataframe(df_closest)
# Downloadable CSV
st.download_button(
label="Download Closest Matches as CSV",
data=df_closest.to_csv(index=False),
file_name="closest_matches.csv",
mime="text/csv",
)
except Exception as e:
st.error(f"Error processing files: {e}")