min24ss commited on
Commit
f5e3afe
ยท
verified ยท
1 Parent(s): a7ada72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -13,12 +13,13 @@ import torch
13
  # ====== ZIP ์ž๋™ ํ•ด์ œ ======
14
  zip_path = "solo_leveling_faiss_ko.zip"
15
  extract_dir = "solo_leveling_faiss_ko"
 
16
  if os.path.exists(zip_path) and not os.path.exists(extract_dir):
17
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
18
  zip_ref.extractall(extract_dir)
19
  print(f"[INFO] ์••์ถ• ํ•ด์ œ ์™„๋ฃŒ โ†’ {extract_dir}")
20
 
21
- # ====== ๋ฐ์ดํ„ฐ ๋กœ๋“œ ======
22
  df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
23
  df['row_id'] = df.index
24
  df['text'] = df.apply(
@@ -27,9 +28,26 @@ df['text'] = df.apply(
27
  )
28
  texts = df['text'].tolist()
29
 
30
- # ====== FAISS ๋กœ๋“œ ======
31
  embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
32
- vectorstore = FAISS.load_local(extract_dir, embedding_model, allow_dangerous_deserialization=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # ====== ๋ชจ๋ธ ๋กœ๋“œ (CPU ์ „์šฉ) ======
35
  model_name = "kakaocorp/kanana-nano-2.1b-instruct"
@@ -43,6 +61,7 @@ custom_prompt = PromptTemplate(
43
  input_variables=["context", "question"],
44
  template="๋‹ค์Œ ๋ฌธ๋งฅ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ตํ•˜์„ธ์š”.\n\n๋ฌธ๋งฅ:\n{context}\n\n์งˆ๋ฌธ:\n{question}\n\n๋‹ต๋ณ€:"
45
  )
 
46
  qa_chain = RetrievalQA.from_chain_type(
47
  llm=llm,
48
  retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
 
13
  # ====== ZIP ์ž๋™ ํ•ด์ œ ======
14
  zip_path = "solo_leveling_faiss_ko.zip"
15
  extract_dir = "solo_leveling_faiss_ko"
16
+
17
  if os.path.exists(zip_path) and not os.path.exists(extract_dir):
18
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
19
  zip_ref.extractall(extract_dir)
20
  print(f"[INFO] ์••์ถ• ํ•ด์ œ ์™„๋ฃŒ โ†’ {extract_dir}")
21
 
22
+ # ====== TSV ๋ฐ์ดํ„ฐ ๋กœ๋“œ ======
23
  df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
24
  df['row_id'] = df.index
25
  df['text'] = df.apply(
 
28
  )
29
  texts = df['text'].tolist()
30
 
31
+ # ====== FAISS ์•ˆ์ „ ๋กœ๋“œ ======
32
  embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
33
+
34
+ possible_paths = [
35
+ extract_dir,
36
+ os.path.join(extract_dir, "solo_leveling_faiss_ko"),
37
+ os.path.join(extract_dir, "faiss_index")
38
+ ]
39
+
40
+ load_path = None
41
+ for path in possible_paths:
42
+ if os.path.exists(os.path.join(path, "index.faiss")):
43
+ load_path = path
44
+ break
45
+
46
+ if load_path:
47
+ vectorstore = FAISS.load_local(load_path, embedding_model, allow_dangerous_deserialization=True)
48
+ print(f"[INFO] FAISS ์ธ๋ฑ์Šค ๋กœ๋“œ ์™„๋ฃŒ โ†’ {load_path}")
49
+ else:
50
+ raise FileNotFoundError("FAISS index.faiss ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์••์ถ• ๊ตฌ์กฐ๋ฅผ ํ™•์ธํ•˜์„ธ์š”.")
51
 
52
  # ====== ๋ชจ๋ธ ๋กœ๋“œ (CPU ์ „์šฉ) ======
53
  model_name = "kakaocorp/kanana-nano-2.1b-instruct"
 
61
  input_variables=["context", "question"],
62
  template="๋‹ค์Œ ๋ฌธ๋งฅ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ตํ•˜์„ธ์š”.\n\n๋ฌธ๋งฅ:\n{context}\n\n์งˆ๋ฌธ:\n{question}\n\n๋‹ต๋ณ€:"
63
  )
64
+
65
  qa_chain = RetrievalQA.from_chain_type(
66
  llm=llm,
67
  retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),