RedFin_FastAPI / main.py
mattritchey's picture
Update main.py
66587d0
raw
history blame
2.96 kB
from fastapi import FastAPI
import uvicorn
import pandas as pd
from joblib import Parallel, delayed
from redfin import Redfin
import requests
requests.urllib3.disable_warnings()
app = FastAPI()
#Endpoints
#Root endpoints
@app.get("/")
def root():
return {"API": "Google Address Scrap"}
def red_fin_api(add):
client = Redfin()
response = client.search(add)
try:
url = response['payload']['exactMatch']['url']
initial_info = client.initial_info(url)
except:
initial_info = add
try:
property_id = initial_info['payload']['propertyId']
mls_data = client.below_the_fold(property_id)
except:
mls_data = add
try:
lat,lon=initial_info['payload']['latLong'].values()
img=initial_info['payload']['preloadImageUrls'][0]
# int_group=r[1]['payload']['amenitiesInfo']['superGroups'][0]['amenityGroups']
ext_prop=mls_data['payload']['amenitiesInfo']['superGroups'][1]['amenityGroups'][0]['amenityEntries']
ext_prop=pd.DataFrame(ext_prop)
ext_prop['amenityValues']=[i[0] for i in ext_prop['amenityValues'].values]
ext_prop2=ext_prop[['referenceName','amenityValues']].T
ext_prop2.columns=ext_prop2.values[0]
ext_prop3=ext_prop2.tail(1).reset_index(drop=1)
df=pd.DataFrame(mls_data['payload']['publicRecordsInfo']['basicInfo'],index=[0]).drop(columns=['apn','propertyLastUpdatedDate','displayTimeZone'])
df['Lat']=lat
df['Lon']=lon
# df['Image']=img
df2=df.join(ext_prop3)
df2.insert(0,'url',f'https://www.redfin.com{url}')
except:
df2=pd.DataFrame({'Missing':[1]})
df2.insert(0,'Address Input',add)
return df2
def catch_errors(addresses):
try:
return red_fin_api(addresses)
except:
return pd.DataFrame({'Address Input':[addresses]})
def process_multiple_address(addresses):
results=Parallel(n_jobs=64, prefer="threads")(delayed(catch_errors)(i) for i in addresses)
return results
@app.get('/Redfin_Address_Scrap')
async def predict(address_input: str):
address_input_split = address_input.split(';')
results = process_multiple_address(address_input_split)
results = pd.concat(results).reset_index(drop=1)
cols_order=['Address Input', 'sqFtFinished', 'totalSqFt', 'yearBuilt', 'propertyTypeName', 'beds', 'baths', 'numStories',
'url',
'Lat', 'Lon']
cols_other=[i for i in results.columns if i not in cols_order ]
results=results[cols_order+cols_other].reset_index()
results['index']=results['index']+1
results.index=results.index+1
results=results.rename(columns={'index':'Input Position'})
prediction['yearBuilt']=results['yearBuilt'].fillna(0).astype(int).astype(str).replace('0','')
return prediction.to_json()