Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import openpyxl | |
| import tempfile | |
| import pycountry | |
| classifier = pipeline("zero-shot-classification", | |
| model="LogicSpine/address-large-text-classifier") | |
| def check_for_third(address: str) -> bool: | |
| countries = [country.name.lower() for country in pycountry.countries] | |
| old_country_names = [ | |
| "burma", | |
| "ceylon", | |
| "persia", | |
| "zaire", | |
| "upper volta", | |
| "swaziland", | |
| "macedonia", | |
| "czech republic", | |
| "turkey", | |
| "holland", | |
| "kampuchea", | |
| "dahomey", | |
| "bechuanaland", | |
| "gold coast", | |
| "nyasaland", | |
| "korea" | |
| ] | |
| countries = countries + old_country_names | |
| if "," in address: | |
| address = address.split(",") | |
| else: | |
| address = [address.strip()] | |
| for ad in address: | |
| if ad.lower().strip() in countries: | |
| return True | |
| return False | |
| def check_for_first(address: str) -> bool: | |
| keyword_list = ["school", "laboratory", "department"] | |
| for key in keyword_list: | |
| if key.lower() in address.lower(): | |
| return True | |
| return False | |
| def check_for_second(address: str) -> bool: | |
| keyword_list = ["university"] | |
| for key in keyword_list: | |
| if key.lower() in address.lower().strip(): | |
| return True | |
| return False | |
| def compaire_two(bigger: str, smaller: str, mid: int) -> bool: | |
| """Helps to find the result according to the priority | |
| Args: | |
| bigger (str): Pass the bigger | |
| smaller (str): And smaller | |
| mid (int): Pass the mid where 1 reffer to 1st and 2 as 2nd and 3 as 3rd | |
| Raises: | |
| ValueError: If invalid mid is passed | |
| Returns: | |
| bool: if bigger have more priority then True else False | |
| """ | |
| if mid == 1: | |
| if check_for_first(bigger): | |
| return True | |
| lab = ["School", "Department", "Laboratory"] | |
| elif mid == 2: | |
| if check_for_second(bigger): | |
| return True | |
| lab = ["University", "Polytechnic"] | |
| elif mid == 3: | |
| if check_for_third(bigger): | |
| return True | |
| lab = ["State", "District", "Country"] | |
| else: | |
| raise ValueError(f"Invalid value passed in mid : {mid}") | |
| sb = classifier(bigger, lab) | |
| ss = classifier(smaller, lab) | |
| result_bigger = sum(sb["scores"]) | |
| result_smaller = sum(ss["scores"]) | |
| if result_bigger > result_smaller: | |
| return True | |
| return False | |
| def get_ai_position(address: str) -> int: | |
| """This function use AI to find the position of the address | |
| Args: | |
| address (str): Pass the address here | |
| Returns: | |
| int: Return the mid 1 for 4th and 2 for 5th and 3 for 6th | |
| """ | |
| if check_for_first(address): | |
| return 1 | |
| if check_for_second(address): | |
| return 2 | |
| if check_for_third(address): | |
| return 3 | |
| result_first = sum(classifier(address, ["School", "Department", "Laboratory"])["scores"]) | |
| result_second = sum(classifier(address, ["University", "Polytechnic"])["scores"]) | |
| result_third = sum(classifier(address, ["State", "District", "Country"])["scores"]) | |
| total = max(result_first, result_second, result_third) | |
| if total == result_first: | |
| return 1 | |
| elif total == result_second: | |
| return 2 | |
| if total == result_third: | |
| return 3 | |
| else: | |
| return 3 | |
| def compare_by_mid(bigger: int, smaller: int, address: str, threshold: float = 0.1) -> bool: | |
| """Helps to find the proper position for the address according to the mid | |
| Args: | |
| bigger (int): Pass the mid 1, 2 or 3 | |
| smaller (int): Pass the mid 1, 2 or 3 | |
| address (str): If possibility of bigger is more then return True else False | |
| threshold (float): Minimum score difference to consider valid comparison | |
| Returns: | |
| bool: Boolean | |
| """ | |
| if bigger == 1: | |
| if check_for_first(bigger): | |
| return True | |
| bigger_l = ["School", "Department", "Laboratory"] | |
| elif bigger == 2: | |
| if check_for_second(address): | |
| return True | |
| bigger_l = ["University", "Polytechnic"] | |
| else: | |
| if check_for_third(address): | |
| return True | |
| bigger_l = ["State", "District", "Country"] | |
| if smaller == 1: | |
| smaller_l = ["School", "Department", "Laboratory"] | |
| elif smaller == 2: | |
| smaller_l = ["University", "Polytechnic"] | |
| else: | |
| smaller_l = ["State", "District", "Country"] | |
| result_bigger = classifier(address, bigger_l) | |
| result_smaller = classifier(address, smaller_l) | |
| max_bigger = max(result_bigger["scores"]) | |
| max_smaller = max(result_smaller["scores"]) | |
| score_difference = max_smaller - max_bigger | |
| return score_difference > threshold | |
| def find_missing_data(data1: str, data2: str, data3: str, var1: str, var2: str, var3: str) -> str: | |
| """Helps to find the missing data | |
| Args: | |
| data1 (str): Pass the first data or you can say address | |
| data2 (str): Pass the 2nd address | |
| data3 (str): pass third address | |
| var1 (str): pass the first variable to check | |
| var2 (str): pass 2nd variable to check | |
| var3 (str): pass the third variable to check the 3rd address | |
| Returns: | |
| str: return the address as string | |
| """ | |
| data_set = {data1, data2, data3} | |
| variables_filled = {var1, var2, var3} | |
| missing_data = data_set - variables_filled | |
| if missing_data: | |
| return ', '.join(missing_data) | |
| else: | |
| return "All data has been assigned correctly." | |
| def swapper(i1, i2, i3): | |
| first, second, third = None, None, None | |
| inputs = [i1, i2, i3] | |
| first_candidates = [] | |
| for data in inputs: | |
| original_data = data | |
| if check_for_third(data): | |
| if third is None: | |
| third = data | |
| else: | |
| third, data = data, third | |
| if check_for_first(data): | |
| first_candidates.append(data) | |
| elif check_for_second(data): | |
| if second is None: | |
| second = data | |
| else: | |
| second, data = data, second | |
| elif check_for_first(data): | |
| first_candidates.append(data) | |
| elif check_for_second(data): | |
| if second is None: | |
| second = data | |
| else: | |
| second, data = data, second | |
| if first_candidates: | |
| first = first_candidates[0] if first is None else first | |
| if len(first_candidates) > 1: | |
| second = first_candidates[1] if second is None else second | |
| remaining_data = [i1, i2, i3] | |
| if first is None: | |
| first = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) | |
| if second is None: | |
| second = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) | |
| if third is None: | |
| third = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) | |
| return first, second, third | |
| def settle_all_address(address_first: str, address_second: str, address_third: str): | |
| address_1 = address_first | |
| address_2 = address_second | |
| address_3 = address_third | |
| r_add1 = None | |
| r_add2 = None | |
| r_add3 = None | |
| # Check for first function | |
| if check_for_first(address_first): | |
| r_add1 = address_first | |
| elif check_for_first(address_second): | |
| r_add1 = address_second | |
| elif check_for_first(address_third): | |
| r_add1 = address_third | |
| # Check for second function | |
| if check_for_second(address_first): | |
| r_add2 = address_first | |
| elif check_for_second(address_second): | |
| r_add2 = address_second | |
| elif check_for_second(address_third): | |
| r_add2 = address_third | |
| # Check for third function | |
| if check_for_third(address_first): | |
| r_add3 = address_first | |
| elif check_for_third(address_second): | |
| r_add3 = address_second | |
| elif check_for_third(address_third): | |
| r_add3 = address_third | |
| if r_add1 == r_add2 or r_add1 == r_add3 or r_add2 == r_add3: | |
| # Duplicate data found now perform the comparizon in here | |
| if r_add1 == r_add2 == r_add3: | |
| r_add1 = None | |
| r_add2 = None | |
| r_add3 = None | |
| else: | |
| if r_add1 == r_add2 and r_add1 != None: # If address 1 and address 2 is same then use AI for checking | |
| m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 | |
| if compaire_two(m_add, r_add2, 1): | |
| r_add2 = m_add | |
| else: | |
| r_add1 = m_add | |
| elif r_add1 == r_add3 and r_add1 != None: | |
| m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 | |
| if compaire_two(m_add, r_add3, 1): | |
| r_add1 = m_add | |
| else: | |
| r_add3 = m_add | |
| elif r_add2 == r_add3 and r_add2 != None: | |
| m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 | |
| if compaire_two(m_add, r_add3, 3): | |
| r_add3 = m_add | |
| else: | |
| r_add2 = m_add | |
| if r_add1 == None or r_add2 == None or r_add3 == None: | |
| # if any of them is None then calculate the address | |
| ai_position1 = get_ai_position(address_1) | |
| ai_position2 = get_ai_position(address_2) | |
| ai_position3 = get_ai_position(address_3) | |
| if ai_position1 == 3: | |
| if r_add3: | |
| pass | |
| else: | |
| r_add3 = address_1 | |
| if r_add1 == None or r_add2 == None: | |
| if r_add3 == address_1: | |
| if compare_by_mid(1, 2, address_2): | |
| r_add1 = address_2 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_2 | |
| elif r_add3 == address_2: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_1 | |
| elif r_add3 == address_3: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add2 = address_1 | |
| elif ai_position1 == 2: | |
| if r_add2: | |
| pass | |
| else: | |
| r_add2 = address_1 | |
| if r_add1 == None or r_add3 == None: | |
| if r_add2 == address_1: | |
| if compare_by_mid(1, 3, address_2): | |
| r_add1 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_2 | |
| elif r_add2 == address_2: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_1 | |
| elif r_add2 == address_3: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add3 = address_1 | |
| else: | |
| if r_add1: | |
| pass | |
| else: | |
| r_add1 = address_1 | |
| if r_add2 == None or r_add3 == None: | |
| if r_add1 == address_1: | |
| if compare_by_mid(2, 3, address_2): | |
| r_add2 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_2 | |
| elif r_add1 == address_2: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_1 | |
| elif r_add1 == address_3: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add2 = address_2 | |
| r_add3 = address_1 | |
| if ai_position2 == 3: | |
| if r_add3: | |
| pass | |
| else: | |
| r_add3 = address_2 | |
| if r_add1 == None or r_add2 == None: | |
| if r_add3 == address_1: | |
| if compare_by_mid(1, 2, address_2): | |
| r_add1 = address_2 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_2 | |
| elif r_add3 == address_2: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_1 | |
| elif r_add3 == address_3: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add2 = address_1 | |
| elif ai_position2 == 2: | |
| if r_add2: | |
| pass | |
| else: | |
| r_add2 = address_2 | |
| if r_add1 == None or r_add3 == None: | |
| if r_add2 == address_1: | |
| if compare_by_mid(1, 3, address_2): | |
| r_add1 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_2 | |
| elif r_add2 == address_2: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_1 | |
| elif r_add2 == address_3: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add3 = address_1 | |
| else: | |
| if r_add1: | |
| pass | |
| else: | |
| r_add1 = address_2 | |
| if r_add2 == None or r_add3 == None: | |
| if r_add1 == address_1: | |
| if compare_by_mid(2, 3, address_2): | |
| r_add2 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_2 | |
| elif r_add1 == address_2: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_1 | |
| elif r_add1 == address_3: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add2 = address_2 | |
| r_add3 = address_1 | |
| if ai_position3 == 3: | |
| if r_add3: | |
| pass | |
| else: | |
| r_add3 = address_3 | |
| if r_add1 == None or r_add2 == None: | |
| if r_add3 == address_1: | |
| if compare_by_mid(1, 2, address_2): | |
| r_add1 = address_2 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_2 | |
| elif r_add3 == address_2: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add2 = address_1 | |
| elif r_add3 == address_3: | |
| if compare_by_mid(1, 2, address_1): | |
| r_add1 = address_1 | |
| r_add2 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add2 = address_1 | |
| elif ai_position3 == 2: | |
| if r_add2: | |
| pass | |
| else: | |
| r_add2 = address_3 | |
| if r_add1 == None or r_add3 == None: | |
| if r_add2 == address_1: | |
| if compare_by_mid(1, 3, address_2): | |
| r_add1 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_2 | |
| elif r_add2 == address_2: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add1 = address_3 | |
| r_add3 = address_1 | |
| elif r_add2 == address_3: | |
| if compare_by_mid(1, 3, address_1): | |
| r_add1 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add1 = address_2 | |
| r_add3 = address_1 | |
| else: | |
| if r_add1: | |
| pass | |
| else: | |
| r_add1 = address_3 | |
| if r_add2 == None or r_add3 == None: | |
| if r_add1 == address_1: | |
| if compare_by_mid(2, 3, address_2): | |
| r_add2 = address_2 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_2 | |
| elif r_add1 == address_2: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_3 | |
| else: | |
| r_add2 = address_3 | |
| r_add3 = address_1 | |
| elif r_add1 == address_3: | |
| if compare_by_mid(2, 3, address_1): | |
| r_add2 = address_1 | |
| r_add3 = address_2 | |
| else: | |
| r_add2 = address_2 | |
| r_add3 = address_1 | |
| return swapper(r_add1, r_add2, r_add3) | |
| def process_file(filepath: str): | |
| wb = openpyxl.load_workbook(filepath, data_only=True) | |
| ws = wb.active | |
| new_wb = openpyxl.Workbook() | |
| new_ws = new_wb.active | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') | |
| temp_path = temp_file.name | |
| columns_to_process = [4, 5, 6] | |
| for col in range(1, ws.max_column + 1): | |
| new_ws.cell(row=1, column=col).value = ws.cell(row=1, column=col).value | |
| empty_rows = 0 | |
| for row in ws.iter_rows(min_row=2, max_row=ws.max_row): | |
| if empty_rows > 3: | |
| break | |
| row_num = row[0].row | |
| for col in range(1, ws.max_column + 1): | |
| if col not in columns_to_process: | |
| new_ws.cell(row=row_num, column=col).value = ws.cell(row=row_num, column=col).value | |
| else: | |
| new_ws.cell(row=row_num, column=col).value = None | |
| address_first = ws.cell(row=row_num, column=4).value | |
| address_second = ws.cell(row=row_num, column=5).value | |
| address_third = ws.cell(row=row_num, column=6).value | |
| if address_first != None and address_second != None and address_third != None: | |
| # print(f"Processing {address_first} | {address_second} | {address_third}") | |
| ad1, ad2, ad3 = settle_all_address(address_first, address_second, address_third) | |
| new_ws.cell(row=row_num, column=4).value = ad1 | |
| new_ws.cell(row=row_num, column=5).value = ad2 | |
| new_ws.cell(row=row_num, column=6).value = ad3 | |
| print(f"Adding : {ad1} | {ad2} | {ad3}") | |
| else: | |
| empty_rows += 1 | |
| new_wb.save(temp_path) | |
| return temp_path | |
| def gradio_process(file): | |
| file_path = file.name | |
| output_file_path = process_file(file_path) | |
| return output_file_path | |
| iface = gr.Interface( | |
| fn=gradio_process, | |
| inputs=gr.File(), | |
| outputs=gr.File(), | |
| title="AI Address Processor", | |
| description="Upload an Excel file, and the AI will process the addresses." | |
| ) | |
| iface.launch(debug=True) | |