Spaces:

min24ss
/

r-story-selection

Sleeping

App Files Files Community

min24ss commited on 20 days ago

Commit

f5e3afe

verified ·

1 Parent(s): a7ada72

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -3

app.py CHANGED Viewed

@@ -13,12 +13,13 @@ import torch
 # ====== ZIP 자동 해제 ======
 zip_path = "solo_leveling_faiss_ko.zip"
 extract_dir = "solo_leveling_faiss_ko"
 if os.path.exists(zip_path) and not os.path.exists(extract_dir):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(extract_dir)
     print(f"[INFO] 압축 해제 완료 → {extract_dir}")
-# ====== 데이터 로드 ======
 df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
 df['row_id'] = df.index
 df['text'] = df.apply(
@@ -27,9 +28,26 @@ df['text'] = df.apply(
 )
 texts = df['text'].tolist()
-# ====== FAISS 로드 ======
 embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
-vectorstore = FAISS.load_local(extract_dir, embedding_model, allow_dangerous_deserialization=True)
 # ====== 모델 로드 (CPU 전용) ======
 model_name = "kakaocorp/kanana-nano-2.1b-instruct"
@@ -43,6 +61,7 @@ custom_prompt = PromptTemplate(
     input_variables=["context", "question"],
     template="다음 문맥을 참고하여 질문에 답하세요.\n\n문맥:\n{context}\n\n질문:\n{question}\n\n답변:"
 )
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,
     retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),

 # ====== ZIP 자동 해제 ======
 zip_path = "solo_leveling_faiss_ko.zip"
 extract_dir = "solo_leveling_faiss_ko"
 if os.path.exists(zip_path) and not os.path.exists(extract_dir):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(extract_dir)
     print(f"[INFO] 압축 해제 완료 → {extract_dir}")
+# ====== TSV 데이터 로드 ======
 df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
 df['row_id'] = df.index
 df['text'] = df.apply(
 )
 texts = df['text'].tolist()
+# ====== FAISS 안전 로드 ======
 embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
+possible_paths = [
+    extract_dir,
+    os.path.join(extract_dir, "solo_leveling_faiss_ko"),
+    os.path.join(extract_dir, "faiss_index")
+]
+load_path = None
+for path in possible_paths:
+    if os.path.exists(os.path.join(path, "index.faiss")):
+        load_path = path
+        break
+if load_path:
+    vectorstore = FAISS.load_local(load_path, embedding_model, allow_dangerous_deserialization=True)
+    print(f"[INFO] FAISS 인덱스 로드 완료 → {load_path}")
+else:
+    raise FileNotFoundError("FAISS index.faiss 파일을 찾을 수 없습니다. 압축 구조를 확인하세요.")
 # ====== 모델 로드 (CPU 전용) ======
 model_name = "kakaocorp/kanana-nano-2.1b-instruct"
     input_variables=["context", "question"],
     template="다음 문맥을 참고하여 질문에 답하세요.\n\n문맥:\n{context}\n\n질문:\n{question}\n\n답변:"
 )
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,
     retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),