RedFin_FastAPI / main.py
mattritchey's picture
Update main.py
d2a9c37
raw
history blame
2.92 kB
from fastapi import FastAPI
import uvicorn
import pandas as pd
from joblib import Parallel, delayed
from redfin import Redfin
import requests
requests.urllib3.disable_warnings()
app = FastAPI()
#Endpoints
#Root endpoints
@app.get("/")
def root():
return {"API": "Redfin Address Scrap"}
def red_fin_api(add):
client = Redfin()
response = client.search(add)
try:
url = response['payload']['exactMatch']['url']
initial_info = client.initial_info(url)
except:
initial_info = add
try:
property_id = initial_info['payload']['propertyId']
mls_data = client.below_the_fold(property_id)
except:
mls_data = add
try:
lat,lon=initial_info['payload']['latLong'].values()
img=initial_info['payload']['preloadImageUrls'][0]
# int_group=r[1]['payload']['amenitiesInfo']['superGroups'][0]['amenityGroups']
ext_prop=mls_data['payload']['amenitiesInfo']['superGroups'][1]['amenityGroups'][0]['amenityEntries']
ext_prop=pd.DataFrame(ext_prop)
ext_prop['amenityValues']=[i[0] for i in ext_prop['amenityValues'].values]
ext_prop2=ext_prop[['referenceName','amenityValues']].T
ext_prop2.columns=ext_prop2.values[0]
ext_prop3=ext_prop2.tail(1).reset_index(drop=1)
df=pd.DataFrame(mls_data['payload']['publicRecordsInfo']['basicInfo'],index=[0]).drop(columns=['apn','propertyLastUpdatedDate','displayTimeZone'])
df['Lat']=lat
df['Lon']=lon
# df['Image']=img
df2=df.join(ext_prop3)
df2.insert(0,'url',f'https://www.redfin.com{url}')
except:
df2=pd.DataFrame({'Missing':[1]})
df2.insert(0,'Address Input',add)
return df2
def catch_errors(addresses):
try:
return red_fin_api(addresses)
except:
return pd.DataFrame({'Address Input':[addresses]})
@app.get('/Redfin_Address_Scrap')
async def predict(address_input: str):
address_input_split = address_input.split(';')
# results = process_multiple_address(address_input_split)
results=Parallel(n_jobs=64, prefer="threads")(delayed(catch_errors)(i) for i in address_input_split)
results = pd.concat(results).reset_index(drop=1)
# cols_order=['Address Input', 'sqFtFinished', 'totalSqFt', 'yearBuilt', 'propertyTypeName', 'beds', 'baths', 'numStories',
# 'url',
# 'Lat', 'Lon']
# cols_other=[i for i in results.columns if i not in cols_order ]
# results=results[cols_order+cols_other].reset_index()
results['index']=results['index']+1
results.index=results.index+1
results=results.rename(columns={'index':'Input Position'})
prediction['yearBuilt']=results['yearBuilt'].fillna(0).astype(int).astype(str).replace('0','')
return results.to_json()