deprem-ocr-migrate-ner

Running

App Files Files Community

merve HF Staff commited on Feb 8, 2023

Commit

789663f

1 Parent(s): 3416d9c

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -39

app.py CHANGED Viewed

@@ -15,42 +15,12 @@ import requests
 import json
 import os
-import openai
-class OpenAI_API:
-    def __init__(self):
-        self.openai_api_key = ''
-    def single_request(self, address_text):
-        openai.api_type = "azure"
-        openai.api_base = "https://damlaopenai.openai.azure.com/"
-        openai.api_version = "2022-12-01"
-        openai.api_key = os.getenv("API_KEY")
-        response = openai.Completion.create(
-          engine="Davinci-003",
-          prompt=address_text,
-          temperature=0.9,
-          max_tokens=256,
-          top_p=1.0,
-          n=1,
-          logprobs=0,
-          echo=False,
-          stop=None,
-          frequency_penalty=0,
-          presence_penalty=0,
-          best_of=1)
-        return response
 ########################
-openai.api_key = os.getenv('API_KEY')
 reader = Reader(["tr"])
 def get_parsed_address(input_img):
@@ -98,16 +68,75 @@ def text_dict(input):
     write_db(eval_result)
     return (
-        str(eval_result['city']),
-        str(eval_result['distinct']),
-        str(eval_result['neighbourhood']),
-        str(eval_result['street']),
-        str(eval_result['address']),
         str(eval_result['tel']),
-        str(eval_result['name_surname']),
         str(eval_result['no']),
     )
 def openai_response(ocr_input):
     prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
             plain text input and especially from emergency text that carries address information, your inputs can be text
@@ -144,6 +173,7 @@ def openai_response(ocr_input):
     return resp
 with gr.Blocks() as demo:
     gr.Markdown(
     """
@@ -173,7 +203,7 @@ with gr.Blocks() as demo:
     submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
-    ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
     open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])

 import json
 import os
 ########################
 reader = Reader(["tr"])
+API_TOKEN = os.getenv("HF_TOKEN")
 def get_parsed_address(input_img):
     write_db(eval_result)
     return (
+        str(eval_result['il']),
+        str(eval_result['ilce']),
+        str(eval_result['mahalle']),
+        str(eval_result['sokak']),
+        str(eval_result['Apartman/site']),
         str(eval_result['tel']),
+        str(eval_result['isim']) + str(eval_result['soyad']),
         str(eval_result['no']),
     )
+import json
+import requests
+headers = {"Authorization": f"Bearer {API_TOKEN}"}
+API_URL = "https://api-inference.huggingface.co/models/deprem-ml/deprem-ner"
+def query(payload):
+    data = json.dumps(payload)
+    response = requests.request("POST", API_URL, headers=headers, data=data)
+    return json.loads(response.content.decode("utf-8"))
+def infer(ocr_input)
+    return query({"inputs": ocr_input})
+"""
+ORNEK NER OUTPUT
+[
+  {
+    "entity_group": "mahalle",
+    "score": 0.8160411715507507,
+    "word": "Akevler mahallesi",
+    "start": 14,
+    "end": 31
+  },
+  {
+    "entity_group": "sokak",
+    "score": 0.940501868724823,
+    "word": "Rüzgar sokak",
+    "start": 32,
+    "end": 44
+  },
+  {
+    "entity_group": "Apartman/Site",
+    "score": 0.8081040978431702,
+    "word": "Tuncay apartmanı",
+    "start": 45,
+    "end": 61
+  },
+  {
+    "entity_group": "ilce",
+    "score": 0.854024350643158,
+    "word": "Antakya",
+    "start": 72,
+    "end": 79
+  }
+]"""
+# this is not in use due to content filter
 def openai_response(ocr_input):
     prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
             plain text input and especially from emergency text that carries address information, your inputs can be text
     return resp
 with gr.Blocks() as demo:
     gr.Markdown(
     """
     submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
+    ocr_result.change(query, ocr_result, open_api_text, api_name="upload-text")
     open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])