Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,13 @@ import torch
|
|
13 |
# ====== ZIP ์๋ ํด์ ======
|
14 |
zip_path = "solo_leveling_faiss_ko.zip"
|
15 |
extract_dir = "solo_leveling_faiss_ko"
|
|
|
16 |
if os.path.exists(zip_path) and not os.path.exists(extract_dir):
|
17 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
18 |
zip_ref.extractall(extract_dir)
|
19 |
print(f"[INFO] ์์ถ ํด์ ์๋ฃ โ {extract_dir}")
|
20 |
|
21 |
-
# ====== ๋ฐ์ดํฐ ๋ก๋ ======
|
22 |
df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
|
23 |
df['row_id'] = df.index
|
24 |
df['text'] = df.apply(
|
@@ -27,9 +28,26 @@ df['text'] = df.apply(
|
|
27 |
)
|
28 |
texts = df['text'].tolist()
|
29 |
|
30 |
-
# ====== FAISS ๋ก๋ ======
|
31 |
embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# ====== ๋ชจ๋ธ ๋ก๋ (CPU ์ ์ฉ) ======
|
35 |
model_name = "kakaocorp/kanana-nano-2.1b-instruct"
|
@@ -43,6 +61,7 @@ custom_prompt = PromptTemplate(
|
|
43 |
input_variables=["context", "question"],
|
44 |
template="๋ค์ ๋ฌธ๋งฅ์ ์ฐธ๊ณ ํ์ฌ ์ง๋ฌธ์ ๋ตํ์ธ์.\n\n๋ฌธ๋งฅ:\n{context}\n\n์ง๋ฌธ:\n{question}\n\n๋ต๋ณ:"
|
45 |
)
|
|
|
46 |
qa_chain = RetrievalQA.from_chain_type(
|
47 |
llm=llm,
|
48 |
retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
|
|
|
13 |
# ====== ZIP ์๋ ํด์ ======
|
14 |
zip_path = "solo_leveling_faiss_ko.zip"
|
15 |
extract_dir = "solo_leveling_faiss_ko"
|
16 |
+
|
17 |
if os.path.exists(zip_path) and not os.path.exists(extract_dir):
|
18 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
19 |
zip_ref.extractall(extract_dir)
|
20 |
print(f"[INFO] ์์ถ ํด์ ์๋ฃ โ {extract_dir}")
|
21 |
|
22 |
+
# ====== TSV ๋ฐ์ดํฐ ๋ก๋ ======
|
23 |
df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
|
24 |
df['row_id'] = df.index
|
25 |
df['text'] = df.apply(
|
|
|
28 |
)
|
29 |
texts = df['text'].tolist()
|
30 |
|
31 |
+
# ====== FAISS ์์ ๋ก๋ ======
|
32 |
embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
|
33 |
+
|
34 |
+
possible_paths = [
|
35 |
+
extract_dir,
|
36 |
+
os.path.join(extract_dir, "solo_leveling_faiss_ko"),
|
37 |
+
os.path.join(extract_dir, "faiss_index")
|
38 |
+
]
|
39 |
+
|
40 |
+
load_path = None
|
41 |
+
for path in possible_paths:
|
42 |
+
if os.path.exists(os.path.join(path, "index.faiss")):
|
43 |
+
load_path = path
|
44 |
+
break
|
45 |
+
|
46 |
+
if load_path:
|
47 |
+
vectorstore = FAISS.load_local(load_path, embedding_model, allow_dangerous_deserialization=True)
|
48 |
+
print(f"[INFO] FAISS ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ โ {load_path}")
|
49 |
+
else:
|
50 |
+
raise FileNotFoundError("FAISS index.faiss ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ์์ถ ๊ตฌ์กฐ๋ฅผ ํ์ธํ์ธ์.")
|
51 |
|
52 |
# ====== ๋ชจ๋ธ ๋ก๋ (CPU ์ ์ฉ) ======
|
53 |
model_name = "kakaocorp/kanana-nano-2.1b-instruct"
|
|
|
61 |
input_variables=["context", "question"],
|
62 |
template="๋ค์ ๋ฌธ๋งฅ์ ์ฐธ๊ณ ํ์ฌ ์ง๋ฌธ์ ๋ตํ์ธ์.\n\n๋ฌธ๋งฅ:\n{context}\n\n์ง๋ฌธ:\n{question}\n\n๋ต๋ณ:"
|
63 |
)
|
64 |
+
|
65 |
qa_chain = RetrievalQA.from_chain_type(
|
66 |
llm=llm,
|
67 |
retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
|