matsuap commited on
Commit
f4f7130
·
1 Parent(s): 6ca3d69

処理時間を測定するためのコンテキストマネージャを追加し、ベクトル検索および前処理の各ステップでの実行時間を計測するように修正。コードの可読性を向上させるために、不要なコメントを削除。

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -9,6 +9,16 @@ import os
9
  from fastapi import FastAPI
10
  from pymilvus import MilvusClient
11
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
12
 
13
  # .envファイルを読み込む
14
  load_dotenv()
@@ -58,7 +68,8 @@ def vector_search(query_address, top_k):
58
  max_retries = 5
59
  for attempt in range(max_retries):
60
  try:
61
- query_embeds = embed_via_multilingual_e5_large([query_address])
 
62
  break # 成功した場合はループを抜ける
63
 
64
  except InferenceEndpointError as e:
@@ -75,17 +86,8 @@ def vector_search(query_address, top_k):
75
  elif e.code == InferenceEndpointErrorCode.UNKNOWN_ERROR:
76
  raise gr.Error(f"{InferenceEndpointErrorCode.UNKNOWN_ERROR}: {e.message}")
77
 
78
- '''
79
- hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME_V2)
80
-
81
- if hits:
82
- normalized = hits[0][-1]
83
-
84
- else:
85
  hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
86
- normalized = hits[0][-1]
87
- '''
88
- hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
89
  return hits
90
 
91
  def replace_circle(input_text):
@@ -200,8 +202,11 @@ def compare_two_addresses(address1, address2):
200
 
201
  @app.post("/normalize-address")
202
  def normalize_address(query_address):
203
- preprocessed = preprocess(query_address)
204
- return vector_search(preprocessed, top_k=1)[0][-1]
 
 
 
205
 
206
  # =========================
207
  # Gradio tabs definition
@@ -360,11 +365,14 @@ def create_vector_search_tab():
360
  search_result_df = gr.Dataframe(label="検索結果")
361
 
362
  def search_address(query_address, top_k):
363
- preprocessed = preprocess(query_address)
364
- hits = vector_search(preprocessed, top_k)
365
- normalized = hits[0][-1]
366
- search_result_df = pd.DataFrame(hits, columns=['Top-k', '類似度', '住所'])
367
- splits = split_address(normalized)
 
 
 
368
 
369
  data = {
370
  'pref': splits['pref'],
 
9
  from fastapi import FastAPI
10
  from pymilvus import MilvusClient
11
  from dotenv import load_dotenv
12
+ import time
13
+ from contextlib import contextmanager
14
+
15
+ @contextmanager
16
+ def measure(label="処理"):
17
+ start = time.time()
18
+ yield
19
+ end = time.time()
20
+ print(f"{label} 実行時間: {end - start:.6f} 秒")
21
+
22
 
23
  # .envファイルを読み込む
24
  load_dotenv()
 
68
  max_retries = 5
69
  for attempt in range(max_retries):
70
  try:
71
+ with measure('vector_search - embed_via_multilingual_e5_large'):
72
+ query_embeds = embed_via_multilingual_e5_large([query_address])
73
  break # 成功した場合はループを抜ける
74
 
75
  except InferenceEndpointError as e:
 
86
  elif e.code == InferenceEndpointErrorCode.UNKNOWN_ERROR:
87
  raise gr.Error(f"{InferenceEndpointErrorCode.UNKNOWN_ERROR}: {e.message}")
88
 
89
+ with measure('vector_search - search_via_milvus'):
 
 
 
 
 
 
90
  hits = search_via_milvus(query_embeds[0], top_k, VECTOR_SEARCH_COLLECTION_NAME)
 
 
 
91
  return hits
92
 
93
  def replace_circle(input_text):
 
202
 
203
  @app.post("/normalize-address")
204
  def normalize_address(query_address):
205
+ with measure('preprocess'):
206
+ preprocessed = preprocess(query_address)
207
+ with measure('vector_search'):
208
+ result = vector_search(preprocessed, top_k=1)[0][-1]
209
+ return result
210
 
211
  # =========================
212
  # Gradio tabs definition
 
365
  search_result_df = gr.Dataframe(label="検索結果")
366
 
367
  def search_address(query_address, top_k):
368
+ with measure('preprocess'):
369
+ preprocessed = preprocess(query_address)
370
+ with measure('vector_search'):
371
+ hits = vector_search(preprocessed, top_k)
372
+ normalized = hits[0][-1]
373
+ search_result_df = pd.DataFrame(hits, columns=['Top-k', '類似度', '住所'])
374
+ with measure('split_address'):
375
+ splits = split_address(normalized)
376
 
377
  data = {
378
  'pref': splits['pref'],