File size: 2,955 Bytes
3671cba
ee259c3
3671cba
eac0454
 
b5726de
aade3ca
 
eac0454
3671cba
 
697988f
d687e0e
 
 
 
eac0454
 
 
b5726de
 
 
d687e0e
eac0454
b5726de
 
eac0454
b5726de
eac0454
b5726de
 
 
 
 
eac0454
 
 
b5726de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eac0454
b5726de
eac0454
b5726de
eac0454
b5726de
 
 
eac0454
 
 
 
b5726de
eac0454
 
 
ee259c3
eac0454
b5726de
eac0454
 
 
b5726de
 
eac0454
 
66587d0
b47ed82
 
b5726de
 
 
 
 
 
 
 
 
 
 
 
b47ed82
ee259c3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from fastapi import FastAPI
import uvicorn

import pandas as pd
from joblib import Parallel, delayed
from redfin import Redfin
import requests
requests.urllib3.disable_warnings()

app = FastAPI()


#Endpoints 
#Root endpoints
@app.get("/")
def root():
    return {"API": "Google Address Scrap"}


def red_fin_api(add):
    client = Redfin()
    response = client.search(add)
    
    try:
        url = response['payload']['exactMatch']['url']
        initial_info = client.initial_info(url)
    except:
        initial_info = add
    
    try:
         property_id = initial_info['payload']['propertyId']
         mls_data = client.below_the_fold(property_id)
    except:
        mls_data = add
    

    try:
        
        lat,lon=initial_info['payload']['latLong'].values()
        img=initial_info['payload']['preloadImageUrls'][0]
        
        # int_group=r[1]['payload']['amenitiesInfo']['superGroups'][0]['amenityGroups']
       
        ext_prop=mls_data['payload']['amenitiesInfo']['superGroups'][1]['amenityGroups'][0]['amenityEntries']
        ext_prop=pd.DataFrame(ext_prop)
        ext_prop['amenityValues']=[i[0] for i in ext_prop['amenityValues'].values]
        ext_prop2=ext_prop[['referenceName','amenityValues']].T
        ext_prop2.columns=ext_prop2.values[0]
        ext_prop3=ext_prop2.tail(1).reset_index(drop=1)

        df=pd.DataFrame(mls_data['payload']['publicRecordsInfo']['basicInfo'],index=[0]).drop(columns=['apn','propertyLastUpdatedDate','displayTimeZone'])
    
        df['Lat']=lat
        df['Lon']=lon
        # df['Image']=img
        df2=df.join(ext_prop3)
        df2.insert(0,'url',f'https://www.redfin.com{url}')
        
    except:
        df2=pd.DataFrame({'Missing':[1]})  
    
    df2.insert(0,'Address Input',add)  
    

    return df2


    
def catch_errors(addresses):
    try: 
        return red_fin_api(addresses)
    except:
        return pd.DataFrame({'Address Input':[addresses]})


def process_multiple_address(addresses):
    results=Parallel(n_jobs=64, prefer="threads")(delayed(catch_errors)(i) for i in addresses)
    return results
    
    
    
@app.get('/Redfin_Address_Scrap')
async def predict(address_input: str):
    
    address_input_split = address_input.split(';')
    results = process_multiple_address(address_input_split)
    results = pd.concat(results).reset_index(drop=1)
    
    cols_order=['Address Input',  'sqFtFinished', 'totalSqFt', 'yearBuilt', 'propertyTypeName', 'beds', 'baths', 'numStories',
                'url',
                'Lat', 'Lon']
    cols_other=[i for i in results.columns if i not in cols_order ]
    results=results[cols_order+cols_other].reset_index()
    
    results['index']=results['index']+1
    results.index=results.index+1
    results=results.rename(columns={'index':'Input Position'})
    prediction['yearBuilt']=results['yearBuilt'].fillna(0).astype(int).astype(str).replace('0','')

    return prediction.to_json()