Spaces:
Sleeping
Sleeping
処理時間を測定するためのコンテキストマネージャを追加し、ベクトル検索および前処理の各ステップでの実行時間を計測するように修正。コードの可読性を向上させるために、不要なコメントを削除。
Browse files
app.py
CHANGED
@@ -9,6 +9,16 @@ import os
|
|
9 |
from fastapi import FastAPI
|
10 |
from pymilvus import MilvusClient
|
11 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# .envファイルを読み込む
|
14 |
load_dotenv()
|
@@ -58,7 +68,8 @@ def vector_search(query_address, top_k):
|
|
58 |
max_retries = 5
|
59 |
for attempt in range(max_retries):
|
60 |
try:
|
61 |
-
|
|
|
62 |
break # 成功した場合はループを抜ける
|
63 |
|
64 |
except InferenceEndpointError as e:
|
@@ -75,17 +86,8 @@ def vector_search(query_address, top_k):
|
|
75 |
elif e.code == InferenceEndpointErrorCode.UNKNOWN_ERROR:
|
76 |
raise gr.Error(f"{InferenceEndpointErrorCode.UNKNOWN_ERROR}: {e.message}")
|
77 |
|
78 |
-
''
|
79 |
-
hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME_V2)
|
80 |
-
|
81 |
-
if hits:
|
82 |
-
normalized = hits[0][-1]
|
83 |
-
|
84 |
-
else:
|
85 |
hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
|
86 |
-
normalized = hits[0][-1]
|
87 |
-
'''
|
88 |
-
hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
|
89 |
return hits
|
90 |
|
91 |
def replace_circle(input_text):
|
@@ -200,8 +202,11 @@ def compare_two_addresses(address1, address2):
|
|
200 |
|
201 |
@app.post("/normalize-address")
|
202 |
def normalize_address(query_address):
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
205 |
|
206 |
# =========================
|
207 |
# Gradio tabs definition
|
@@ -360,11 +365,14 @@ def create_vector_search_tab():
|
|
360 |
search_result_df = gr.Dataframe(label="検索結果")
|
361 |
|
362 |
def search_address(query_address, top_k):
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
|
|
|
|
|
|
368 |
|
369 |
data = {
|
370 |
'pref': splits['pref'],
|
|
|
9 |
from fastapi import FastAPI
|
10 |
from pymilvus import MilvusClient
|
11 |
from dotenv import load_dotenv
|
12 |
+
import time
|
13 |
+
from contextlib import contextmanager
|
14 |
+
|
15 |
+
@contextmanager
|
16 |
+
def measure(label="処理"):
|
17 |
+
start = time.time()
|
18 |
+
yield
|
19 |
+
end = time.time()
|
20 |
+
print(f"{label} 実行時間: {end - start:.6f} 秒")
|
21 |
+
|
22 |
|
23 |
# .envファイルを読み込む
|
24 |
load_dotenv()
|
|
|
68 |
max_retries = 5
|
69 |
for attempt in range(max_retries):
|
70 |
try:
|
71 |
+
with measure('vector_search - embed_via_multilingual_e5_large'):
|
72 |
+
query_embeds = embed_via_multilingual_e5_large([query_address])
|
73 |
break # 成功した場合はループを抜ける
|
74 |
|
75 |
except InferenceEndpointError as e:
|
|
|
86 |
elif e.code == InferenceEndpointErrorCode.UNKNOWN_ERROR:
|
87 |
raise gr.Error(f"{InferenceEndpointErrorCode.UNKNOWN_ERROR}: {e.message}")
|
88 |
|
89 |
+
with measure('vector_search - search_via_milvus'):
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
|
|
|
|
|
|
|
91 |
return hits
|
92 |
|
93 |
def replace_circle(input_text):
|
|
|
202 |
|
203 |
@app.post("/normalize-address")
|
204 |
def normalize_address(query_address):
|
205 |
+
with measure('preprocess'):
|
206 |
+
preprocessed = preprocess(query_address)
|
207 |
+
with measure('vector_search'):
|
208 |
+
result = vector_search(preprocessed, top_k=1)[0][-1]
|
209 |
+
return result
|
210 |
|
211 |
# =========================
|
212 |
# Gradio tabs definition
|
|
|
365 |
search_result_df = gr.Dataframe(label="検索結果")
|
366 |
|
367 |
def search_address(query_address, top_k):
|
368 |
+
with measure('preprocess'):
|
369 |
+
preprocessed = preprocess(query_address)
|
370 |
+
with measure('vector_search'):
|
371 |
+
hits = vector_search(preprocessed, top_k)
|
372 |
+
normalized = hits[0][-1]
|
373 |
+
search_result_df = pd.DataFrame(hits, columns=['Top-k', '類似度', '住所'])
|
374 |
+
with measure('split_address'):
|
375 |
+
splits = split_address(normalized)
|
376 |
|
377 |
data = {
|
378 |
'pref': splits['pref'],
|