File size: 2,915 Bytes
3671cba
ee259c3
3671cba
eac0454
 
b5726de
aade3ca
 
eac0454
3671cba
 
697988f
d687e0e
 
 
 
5033ef2
eac0454
 
b5726de
 
 
d687e0e
eac0454
b5726de
 
eac0454
b5726de
eac0454
b5726de
 
 
 
 
eac0454
 
 
b5726de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eac0454
b5726de
eac0454
b5726de
eac0454
b5726de
 
 
eac0454
 
 
 
b5726de
eac0454
 
 
ee259c3
e38d30a
eac0454
 
b5726de
 
eac0454
 
66587d0
3a598d1
e38d30a
8f7cfd1
b5726de
d2a9c37
 
 
 
 
b5726de
e38d30a
 
 
 
b5726de
6614041
ee259c3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from fastapi import FastAPI
import uvicorn

import pandas as pd
from joblib import Parallel, delayed
from redfin import Redfin
import requests
requests.urllib3.disable_warnings()

app = FastAPI()


#Endpoints 
#Root endpoints
@app.get("/")
def root():
    return {"API": "Redfin Address Scrap"}


def red_fin_api(add):
    client = Redfin()
    response = client.search(add)
    
    try:
        url = response['payload']['exactMatch']['url']
        initial_info = client.initial_info(url)
    except:
        initial_info = add
    
    try:
         property_id = initial_info['payload']['propertyId']
         mls_data = client.below_the_fold(property_id)
    except:
        mls_data = add
    

    try:
        
        lat,lon=initial_info['payload']['latLong'].values()
        img=initial_info['payload']['preloadImageUrls'][0]
        
        # int_group=r[1]['payload']['amenitiesInfo']['superGroups'][0]['amenityGroups']
       
        ext_prop=mls_data['payload']['amenitiesInfo']['superGroups'][1]['amenityGroups'][0]['amenityEntries']
        ext_prop=pd.DataFrame(ext_prop)
        ext_prop['amenityValues']=[i[0] for i in ext_prop['amenityValues'].values]
        ext_prop2=ext_prop[['referenceName','amenityValues']].T
        ext_prop2.columns=ext_prop2.values[0]
        ext_prop3=ext_prop2.tail(1).reset_index(drop=1)

        df=pd.DataFrame(mls_data['payload']['publicRecordsInfo']['basicInfo'],index=[0]).drop(columns=['apn','propertyLastUpdatedDate','displayTimeZone'])
    
        df['Lat']=lat
        df['Lon']=lon
        # df['Image']=img
        df2=df.join(ext_prop3)
        df2.insert(0,'url',f'https://www.redfin.com{url}')
        
    except:
        df2=pd.DataFrame({'Missing':[1]})  
    
    df2.insert(0,'Address Input',add)  
    

    return df2


    
def catch_errors(addresses):
    try: 
        return red_fin_api(addresses)
    except:
        return pd.DataFrame({'Address Input':[addresses]})



    
    
    
@app.get('/Redfin_Address_Scrap')
async def predict(address_input: str):
    
    address_input_split = address_input.split(';')
    # results = process_multiple_address(address_input_split)
    results=Parallel(n_jobs=64, prefer="threads")(delayed(catch_errors)(i) for i in address_input_split)
    results = pd.concat(results).reset_index(drop=1)
    
    # cols_order=['Address Input',  'sqFtFinished', 'totalSqFt', 'yearBuilt', 'propertyTypeName', 'beds', 'baths', 'numStories',
    #             'url',
    #             'Lat', 'Lon']
    # cols_other=[i for i in results.columns if i not in cols_order ]
    # results=results[cols_order+cols_other].reset_index()
    
    results['index']=results['index']+1
    results.index=results.index+1
    results=results.rename(columns={'index':'Input Position'})
    prediction['yearBuilt']=results['yearBuilt'].fillna(0).astype(int).astype(str).replace('0','')

    return results.to_json()