import pandas as pd import streamlit as st from geopy.distance import geodesic # Function to calculate distances while preserving all original columns def calculate_distances( df1: pd.DataFrame, df2: pd.DataFrame, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2, ): distances = [] for _, row1 in df1.iterrows(): for _, row2 in df2.iterrows(): coord1 = (row1[lat_col1], row1[long_col1]) coord2 = (row2[lat_col2], row2[long_col2]) distance_km = geodesic(coord1, coord2).kilometers # Compute distance # Combine all original columns + distance combined_row = { **row1.to_dict(), # Keep all columns from Dataset1 **{ f"{col}_Dataset2": row2[col] for col in df2.columns }, # Keep all columns from Dataset2 "Distance_km": distance_km, } distances.append(combined_row) df_distances = pd.DataFrame(distances) # Find the closest point for each Point1 df_closest: pd.DataFrame = df_distances.loc[ df_distances.groupby(code_col1)["Distance_km"].idxmin() ] return df_distances, df_closest # Streamlit UI st.title("Multi Points Distance Calculator") st.write( """This app allows you to calculate the distance between multiple points in 2 datasets. Please choose a file containing the latitude and longitude columns for each dataset. """ ) dataset1_sample_file_path = "samples/Dataset1.xlsx" dataset2_sample_file_path = "samples/Dataset2.xlsx" col1, col2, col3 = st.columns(3) with col1: st.download_button( label="Dataset1 Sample File", data=open(dataset1_sample_file_path, "rb").read(), file_name="Dataset1.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) with col2: st.download_button( label="Dataset2 Sample File", data=open(dataset2_sample_file_path, "rb").read(), file_name="Dataset2.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) # Upload Dataset 1 st.subheader("Upload Dataset 1 (Reference Points)") file1 = st.file_uploader("Upload first dataset (Excel)", type=["xlsx"], key="file1") # Upload Dataset 2 st.subheader("Upload Dataset 2 (Comparison Points)") file2 = st.file_uploader("Upload second dataset (Excel)", type=["xlsx"], key="file2") if file1 and file2: try: # Read the datasets df1 = pd.read_excel(file1) df2 = pd.read_excel(file2) st.subheader("Select Columns for Dataset 1") code_col1 = st.selectbox("Select 'CODE' Column", df1.columns, key="code1") lat_col1 = st.selectbox("Select 'Latitude' Column", df1.columns, key="lat1") long_col1 = st.selectbox("Select 'Longitude' Column", df1.columns, key="long1") st.subheader("Select Columns for Dataset 2") code_col2 = st.selectbox("Select 'CODE' Column", df2.columns, key="code2") lat_col2 = st.selectbox("Select 'Latitude' Column", df2.columns, key="lat2") long_col2 = st.selectbox("Select 'Longitude' Column", df2.columns, key="long2") # Calculate distances when button is clicked if st.button("Calculate Distances"): df_distances, df_closest = calculate_distances( df1, df2, code_col1, lat_col1, long_col1, code_col2, lat_col2, long_col2 ) # Display all distances st.subheader("All Distances") st.dataframe(df_distances) # Display closest points st.subheader("Closest Matches") st.dataframe(df_closest) # Downloadable CSV st.download_button( label="Download Closest Matches as CSV", data=df_closest.to_csv(index=False), file_name="closest_matches.csv", mime="text/csv", ) except Exception as e: st.error(f"Error processing files: {e}")