Spaces:

min24ss
/

r-story-selection

Sleeping

App Files Files Community

min24ss commited on 11 days ago

Commit

ee55b6f

verified ·

1 Parent(s): a7f3205

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -97

app.py CHANGED Viewed

@@ -1,118 +1,138 @@
-import os
-import zipfile
-import pandas as pd
 import gradio as gr
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain_community.llms import HuggingFacePipeline
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-# ====== ZIP 자동 해제 ======
-zip_path = "solo_leveling_faiss_ko.zip"
-extract_dir = "solo_leveling_faiss_ko"
-if os.path.exists(zip_path) and not os.path.exists(extract_dir):
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(extract_dir)
-    print(f"[INFO] 압축 해제 완료 → {extract_dir}")
-# ====== TSV 데이터 로드 ======
-df = pd.read_csv("sl_webtoon_full_data_sequential.tsv", sep="\t")
-df['row_id'] = df.index
-df['text'] = df.apply(
-    lambda x: f"[{x['에피소드']}] #{x['row_id']} {x['type']} {x['scene_text']}",
-    axis=1
-)
-# ====== FAISS 안전 로드 ======
-embedding_model = HuggingFaceEmbeddings(model_name='jhgan/ko-sroberta-multitask')
-possible_paths = [
-    extract_dir,
-    os.path.join(extract_dir, "solo_leveling_faiss_ko"),
-    os.path.join(extract_dir, "faiss_index")
-]
-load_path = None
-for path in possible_paths:
-    if os.path.exists(os.path.join(path, "index.faiss")):
-        load_path = path
-        break
-if load_path:
-    vectorstore = FAISS.load_local(load_path, embedding_model, allow_dangerous_deserialization=True)
-    print(f"[INFO] FAISS 인덱스 로드 완료 → {load_path}")
-else:
-    raise FileNotFoundError("FAISS index.faiss 파일을 찾을 수 없습니다.")
-# ====== 모델 로드 (CPU 전용) ======
 model_name = "kakaocorp/kanana-nano-2.1b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32).to("cpu")
-llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)
-llm = HuggingFacePipeline(pipeline=llm_pipeline)
-# ====== QA 체인 ======
-custom_prompt = PromptTemplate(
-    input_variables=["context", "question"],
-    template="다음 문맥을 참고하여 질문에 답하세요.\n\n문맥:\n{context}\n\n질문:\n{question}\n\n답변:"
 )
-qa_chain = RetrievalQA.from_chain_type(
-    llm=llm,
-    retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
-    chain_type="stuff",
-    return_source_documents=True,
-    chain_type_kwargs={"prompt": custom_prompt}
 )
-# ====== 대화형 응답 함수 ======
 choices = [
-    "1. 황동석 무리를 모두 처치한다.",
-    "2. 진호를 포함한 황동석 무리를 모두 처치한다.",
-    "3. 전부 기절 시키고 살려둔다.",
-    "4. 시스템을 거부하고 그냥 도망친다."
 ]
-def respond(message, history):
     try:
-        sel_num = int(message.strip())
-        if sel_num < 1 or sel_num > len(choices):
-            return gr.ChatMessage(role="assistant", content="❌ 올바른 번호를 입력하세요. (1~4)")
-    except ValueError:
-        return gr.ChatMessage(role="assistant", content="❌ 번호를 입력하세요. (예: 1, 2, 3, 4)")
-    user_choice = choices[sel_num - 1]
-    result = qa_chain({"query": user_choice})
-    retrieved_context = "\n".join([doc.page_content for doc in result["source_documents"]])
     prompt = f"""
-    당신은 웹툰 '나 혼자만 레벨업'의 성진우입니다.
-    현재 상황:
-    {retrieved_context}
-    사용자 선택: {user_choice}
-    성진우의 말투로 간결하고 자연스러운 대사를 1~2문장 생성하세요.
-    중복된 내용이나 비슷한 문장은 만들지 마세요.
-    """
-    response = llm_pipeline(prompt)[0]["generated_text"]
-    # 사용자 메시지(오른쪽)
-    user_msg = gr.ChatMessage(role="user", content=f"{sel_num}번 선택 ({user_choice})")
-    # 성진우 메시지(왼쪽)
-    sjw_msg = gr.ChatMessage(role="assistant", content=response)
-    return [user_msg, sjw_msg]
 demo = gr.ChatInterface(
-    respond,
-    title="성진우 선택 시뮬레이션 (카카오톡 스타일, 성진우 왼쪽)",
-    description="1~4번 중 하나를 입력하면 성진우의 응답이 대화 형식으로 나타납니다."
 )
 if __name__ == "__main__":
     demo.launch()

+import os, zipfile, shutil, glob
 import gradio as gr
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+import langchain
+ZIP_NAME = "solo_leveling_faiss_ko.zip"
+TARGET_DIR = "solo_leveling_faiss_ko"
+def ensure_faiss_dir() -> str:
+    """FAISS index가 어디 있든 로드 가능한 위치를 보장합니다."""
+    if os.path.exists(os.path.join(TARGET_DIR, "index.faiss")) and \
+       os.path.exists(os.path.join(TARGET_DIR, "index.pkl")):
+        return TARGET_DIR
+    if os.path.exists("index.faiss") and os.path.exists("index.pkl"):
+        os.makedirs(TARGET_DIR, exist_ok=True)
+        if not os.path.exists(os.path.join(TARGET_DIR, "index.faiss")):
+            shutil.move("index.faiss", os.path.join(TARGET_DIR, "index.faiss"))
+        if not os.path.exists(os.path.join(TARGET_DIR, "index.pkl")):
+            shutil.move("index.pkl", os.path.join(TARGET_DIR, "index.pkl"))
+        return TARGET_DIR
+    if os.path.exists(ZIP_NAME):
+        with zipfile.ZipFile(ZIP_NAME, 'r') as z:
+            z.extractall(".")
+        if os.path.exists(os.path.join(TARGET_DIR, "index.faiss")) and \
+           os.path.exists(os.path.join(TARGET_DIR, "index.pkl")):
+            return TARGET_DIR
+        faiss_cand = glob.glob("**/index.faiss", recursive=True)
+        pkl_cand   = glob.glob("**/index.pkl",   recursive=True)
+        if faiss_cand and pkl_cand:
+            os.makedirs(TARGET_DIR, exist_ok=True)
+            shutil.copy2(faiss_cand[0], os.path.join(TARGET_DIR, "index.faiss"))
+            shutil.copy2(pkl_cand[0],   os.path.join(TARGET_DIR, "index.pkl"))
+            return TARGET_DIR
+    raise FileNotFoundError("FAISS index files not found (index.faiss / index.pkl).")
+# 0) FAISS 인덱스 위치 확보
+base_dir = ensure_faiss_dir()
+# 1) 벡터 DB
+embeddings = HuggingFaceEmbeddings(model_name="jhgan/ko-sroberta-multitask")
+vectorstore = FAISS.load_local(base_dir, embeddings, allow_dangerous_deserialization=True)
+# 2) 모델 로딩 (CPU 환경 안전 옵션)
 model_name = "kakaocorp/kanana-nano-2.1b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float32,
+    device_map=None
 )
+# 3) 텍스트 생성 파이프라인
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=100,
+    temperature=0.6,
+    do_sample=True,
+    top_p=0.9,
+    return_full_text=False
 )
+lm = pipe
+# 선택지
 choices = [
+    "1: 황동석 무리를 모두 처치한다.",
+    "2: 진호를 포함한 황동석 무리를 모두 처치한다.",
+    "3: 전부 기절 시키고 살려둔다.",
+    "4: 시스템을 거부하고 그냥 도망친다."
 ]
+# RAG + 대사 생성 함수
+def rag_answer(message, history):
     try:
+        user_idx = int(message.strip()) - 1
+        user_choice = choices[user_idx]
+    except:
+        return "❗올바른 번호를 입력해주세요. (예: 1 ~ 4)"
+    # FAISS 검색
+    docs = vectorstore.similarity_search(user_choice, k=3)
+    context = "\n".join([doc.page_content for doc in docs])
     prompt = f"""
+당신은 웹툰 '나 혼자만 레벨업'의 성진우입니다.
+현재 상황:
+{context}
+사용자 선택: {user_choice}
+성진우의 말투로 간결하고 자연스러운 대사를 1~2문장 생성하세요.
+중복된 내용이나 비슷한 문장은 만들지 마세요.
+"""
+    response = lm(prompt)[0]["generated_text"]
+    only_dialogue = response.strip().split("\n")[-1]
+    # "대사:" 중복 방지
+    if not only_dialogue.startswith("대사:"):
+        only_dialogue = "대사: " + only_dialogue
+    return only_dialogue
+# 배경 이미지 CSS
+css_code = """
+body {
+    background-image: url('https://huggingface.co/spaces/min24ss/r-story-test/resolve/main/jinwoo.png');
+    background-size: cover;
+    background-position: center;
+}
+"""
+# Gradio UI
 demo = gr.ChatInterface(
+    fn=rag_answer,
+    title="[긴급 퀘스트: 적을 처치하라!]",
+    description=(
+        "'플레이어'에게 살의를 가진 이들이 주위에 있습니다. 이들을 모두 처치하여 안전을 확보하십시오.<br>"
+        "지시에 따르지 않으면 당신의 심장은 정지(!)하게 됩니다.<br>"
+        "처치해야 할 적의 숫자: 8명 / 처치한 적의 숫자: 0명<br><br>"
+        "💬 선택지를 입력하세요:<br>"
+        "1: 황동석 무리를 모두 처치한다.<br>"
+        "2: 황동석 무리와 진호를 포함하여 모두 처치한다.<br>"
+        "3: 전부 기절 시키고 살려둔다.<br>"
+        "4: 시스템을 거부하고 그냥 도망친다."
+    ),
+    css=css_code
 )
+# 실행
 if __name__ == "__main__":
+    print("Torch:", torch.__version__)
+    print("Transformers:", __import__('transformers').__version__)
+    print("LangChain:", langchain.__version__)
     demo.launch()