import streamlit as st import pandas as pd from joblib import Parallel, delayed from redfin import Redfin import requests requests.urllib3.disable_warnings() @st.cache_data def convert_df(df): return df.to_csv() def red_fin_api(add): client = Redfin() response = client.search(add) try: url = response['payload']['exactMatch']['url'] initial_info = client.initial_info(url) except: initial_info = add try: property_id = initial_info['payload']['propertyId'] mls_data = client.below_the_fold(property_id) except: mls_data = add try: lat,lon=initial_info['payload']['latLong'].values() img=initial_info['payload']['preloadImageUrls'][0] # int_group=r[1]['payload']['amenitiesInfo']['superGroups'][0]['amenityGroups'] ext_prop=mls_data['payload']['amenitiesInfo']['superGroups'][1]['amenityGroups'][0]['amenityEntries'] ext_prop=pd.DataFrame(ext_prop) ext_prop['amenityValues']=[i[0] for i in ext_prop['amenityValues'].values] ext_prop2=ext_prop[['referenceName','amenityValues']].T ext_prop2.columns=ext_prop2.values[0] ext_prop3=ext_prop2.tail(1).reset_index(drop=1) df=pd.DataFrame(mls_data['payload']['publicRecordsInfo']['basicInfo'],index=[0]).drop(columns=['apn','propertyLastUpdatedDate','displayTimeZone']) df['Lat']=lat df['Lon']=lon # df['Image']=img df2=df.join(ext_prop3) df2.insert(0,'url',f'https://www.redfin.com{url}') except: df2=pd.DataFrame({'Missing':[1]}) df2.insert(0,'Address Input',add) return df2 def catch_errors(addresses): try: return red_fin_api(addresses) except: return pd.DataFrame({'Address Input':[addresses]}) @st.cache_data def process_multiple_address(addresses): results=Parallel(n_jobs=64, prefer="threads")(delayed(catch_errors)(i) for i in addresses) return results st.set_page_config(layout="wide") st.header("Redfin Data") address = st.sidebar.text_input("Single Address:", "190 Pebble Creek Dr Etna, OH 43062") uploaded_file = st.sidebar.file_uploader("Upload Multiple Addresses:") if uploaded_file is not None: try: df = pd.read_csv(uploaded_file) except: df = pd.read_excel(uploaded_file) address_cols=list(df.columns[:4]) df[address_cols[-1]]=df[address_cols[-1]].astype(str).str[:5].astype(int).astype(str) df[address_cols[-1]]=df[address_cols[-1]].apply(lambda x: x.zfill(5)) df['Address All']=df[address_cols[0]]+', '+df[address_cols[1]]+', '+df[address_cols[2]]+' '+df[address_cols[3]] results= process_multiple_address(df['Address All'].values) results=pd.concat(results).reset_index(drop=1) # results.index=results.index+1 else: results=red_fin_api(address).reset_index(drop=1) # results.index=results.index+1 total_input_shape=results.shape[0] cols_order=['Address Input', 'sqFtFinished', 'totalSqFt', 'yearBuilt', 'propertyTypeName', 'beds', 'baths', 'numStories', 'url', 'Lat', 'Lon'] cols_other=[i for i in results.columns if i not in cols_order ] try: missing=results.query("Missing==Missing")[['Address Input']].reset_index() missing.index=missing.index+1 missing['index']=missing['index']+1 missing.columns=['Input Position','Address Input'] results=results.query("Missing!=Missing")[cols_order+cols_other].drop(columns=['Missing']).reset_index() except: results=results[cols_order+cols_other].reset_index() results['index']=results['index']+1 results.index=results.index+1 results=results.rename(columns={'index':'Input Position'}) results['yearBuilt']=results['yearBuilt'].fillna(0).astype(int).astype(str).replace('0','') results_shape=results.shape[0] percent_results=(results_shape/total_input_shape)*100 with st.container(): st.write(f"Redfin Results: {percent_results}%") st.dataframe( results, column_config={ "url": st.column_config.LinkColumn("url"), "Image": st.column_config.LinkColumn("Image"), }, hide_index=False, ) csv = convert_df(results) st.download_button( label="Download Results as CSV", data=csv, file_name=f'Redfin Results.csv', mime='text/csv') try: csv2 = convert_df(missing) st.write(f"Missing Addresses: {100-percent_results}%") st.dataframe(missing) st.download_button( label="Download Missing Data as CSV", data=csv2, file_name=f'Redfin missing.csv', mime='text/csv') except: pass st.markdown(""" """, unsafe_allow_html=True)