Spaces:
Sleeping
Sleeping
tomas.helmfridsson
commited on
Commit
Β·
bdb1db1
1
Parent(s):
41104fb
update 41
Browse files
app.py
CHANGED
@@ -22,16 +22,22 @@ DEFAULT_TEMP = 0.3
|
|
22 |
K = 10
|
23 |
|
24 |
# ββ LOGGING βββββββββββββββββββββββββββββββββββββββββββββββββ
|
25 |
-
logging.basicConfig(
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# ββ 1) Bygg eller ladda FAISS-index βββββββββββββββββββββββββ
|
28 |
emb = HuggingFaceEmbeddings(model_name=EMB_MODEL)
|
29 |
if os.path.isdir(INDEX_DIR):
|
30 |
-
|
31 |
vs = FAISS.load_local(INDEX_DIR, emb)
|
32 |
else:
|
33 |
-
|
34 |
-
splitter = RecursiveCharacterTextSplitter(
|
|
|
|
|
35 |
all_docs, files = [], []
|
36 |
for fn in os.listdir(DOCS_DIR):
|
37 |
if fn.lower().endswith(".pdf"):
|
@@ -45,9 +51,10 @@ else:
|
|
45 |
files.append(fn)
|
46 |
vs = FAISS.from_documents(all_docs, emb)
|
47 |
vs.save_local(INDEX_DIR)
|
48 |
-
|
49 |
|
50 |
# ββ 2) Initiera LLM ββββββββββββββββββββββββββββββββββββββββββ
|
|
|
51 |
pipe = pipeline(
|
52 |
"text-generation",
|
53 |
model=LLM_MODEL,
|
@@ -56,9 +63,12 @@ pipe = pipeline(
|
|
56 |
)
|
57 |
llm = HuggingFacePipeline(
|
58 |
pipeline=pipe,
|
59 |
-
model_kwargs={
|
|
|
|
|
|
|
60 |
)
|
61 |
-
|
62 |
|
63 |
# ββ 3) Bygg RetrievalQA βββββββββββββββββββββββββββββββββββββ
|
64 |
retriever = vs.as_retriever(search_kwargs={"k": K})
|
@@ -68,48 +78,76 @@ qa_chain = RetrievalQA.from_chain_type(
|
|
68 |
return_source_documents=False,
|
69 |
chain_type="stuff"
|
70 |
)
|
71 |
-
|
72 |
|
73 |
# ββ 4) Gradio-funktioner ββββββββββββββββββββββββββββββββββββ
|
74 |
-
def ping():
|
|
|
75 |
return "pong"
|
76 |
|
77 |
def test_retrieval(query: str) -> str:
|
78 |
-
|
79 |
-
|
|
|
80 |
return "π« Inga trΓ€ffar"
|
81 |
out = []
|
82 |
-
for i, d in enumerate(
|
83 |
src = d.metadata.get("source", "okΓ€nd")
|
84 |
snippet = d.page_content.replace("\n", " ")[:200]
|
85 |
out.append(f"{i}. ({src}) β¦{snippet}β¦")
|
86 |
return "\n\n".join(out)
|
87 |
|
88 |
-
def chat_fn(
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
history = history or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
if not query.strip():
|
92 |
-
|
|
|
|
|
93 |
return history, history
|
94 |
|
95 |
history.append({"role": "user", "content": query})
|
96 |
|
97 |
-
#
|
98 |
docs = retriever.get_relevant_documents(query)
|
99 |
context = "\n\n---\n\n".join(d.page_content for d in docs)
|
100 |
prompt = f"Kontekst:\n{context}\n\nFrΓ₯ga: {query}"
|
|
|
101 |
|
102 |
-
#
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
105 |
try:
|
106 |
svar = qa_chain.run(prompt)
|
107 |
except Exception as e:
|
108 |
-
|
|
|
109 |
|
|
|
110 |
src = docs[0].metadata.get("source", "okΓ€nd") if docs else "okΓ€nd"
|
111 |
content = f"**(Dokument: {src})**\n\n{svar}"
|
112 |
history.append({"role": "assistant", "content": content})
|
|
|
113 |
|
114 |
return history, history
|
115 |
|
@@ -133,10 +171,11 @@ with gr.Blocks() as demo:
|
|
133 |
chatbot = gr.Chatbot(type="messages", label="Chatt")
|
134 |
chat_state = gr.State([])
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
test_btn.click(fn=test_retrieval, inputs=[txt], outputs=[test_out])
|
139 |
send.click(fn=chat_fn, inputs=[txt, temp, chat_state], outputs=[chatbot, chat_state])
|
140 |
|
141 |
if __name__ == "__main__":
|
|
|
142 |
demo.launch(share=True)
|
|
|
|
22 |
K = 10
|
23 |
|
24 |
# ββ LOGGING βββββββββββββββββββββββββββββββββββββββββββββββββ
|
25 |
+
logging.basicConfig(
|
26 |
+
level=logging.INFO,
|
27 |
+
format="%(asctime)s %(levelname)s %(message)s"
|
28 |
+
)
|
29 |
+
logger = logging.getLogger(__name__)
|
30 |
|
31 |
# ββ 1) Bygg eller ladda FAISS-index βββββββββββββββββββββββββ
|
32 |
emb = HuggingFaceEmbeddings(model_name=EMB_MODEL)
|
33 |
if os.path.isdir(INDEX_DIR):
|
34 |
+
logger.info(f"π Laddar befintligt FAISS-index frΓ₯n `{INDEX_DIR}`β¦")
|
35 |
vs = FAISS.load_local(INDEX_DIR, emb)
|
36 |
else:
|
37 |
+
logger.info("βοΈ Bygger nytt FAISS-index frΓ₯n PDF-filerβ¦")
|
38 |
+
splitter = RecursiveCharacterTextSplitter(
|
39 |
+
chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
|
40 |
+
)
|
41 |
all_docs, files = [], []
|
42 |
for fn in os.listdir(DOCS_DIR):
|
43 |
if fn.lower().endswith(".pdf"):
|
|
|
51 |
files.append(fn)
|
52 |
vs = FAISS.from_documents(all_docs, emb)
|
53 |
vs.save_local(INDEX_DIR)
|
54 |
+
logger.info(f"β
Sparade index i `{INDEX_DIR}` ({len(files)} PDF, {len(all_docs)} chunkar)")
|
55 |
|
56 |
# ββ 2) Initiera LLM ββββββββββββββββββββββββββββββββββββββββββ
|
57 |
+
logger.info("π Initierar LLM-pipelineβ¦")
|
58 |
pipe = pipeline(
|
59 |
"text-generation",
|
60 |
model=LLM_MODEL,
|
|
|
63 |
)
|
64 |
llm = HuggingFacePipeline(
|
65 |
pipeline=pipe,
|
66 |
+
model_kwargs={
|
67 |
+
"temperature": DEFAULT_TEMP,
|
68 |
+
"max_new_tokens": MAX_NEW_TOKENS
|
69 |
+
}
|
70 |
)
|
71 |
+
logger.info("β
LLM-pipeline initierad")
|
72 |
|
73 |
# ββ 3) Bygg RetrievalQA βββββββββββββββββββββββββββββββββββββ
|
74 |
retriever = vs.as_retriever(search_kwargs={"k": K})
|
|
|
78 |
return_source_documents=False,
|
79 |
chain_type="stuff"
|
80 |
)
|
81 |
+
logger.info(f"β
RetrievalQA klar (k={K})")
|
82 |
|
83 |
# ββ 4) Gradio-funktioner ββββββββββββββββββββββββββββββββββββ
|
84 |
+
def ping() -> str:
|
85 |
+
logger.debug("ping() anropad")
|
86 |
return "pong"
|
87 |
|
88 |
def test_retrieval(query: str) -> str:
|
89 |
+
logger.debug(f"test_retrieval() anropad med query={query!r}")
|
90 |
+
docs = retriever.get_relevant_documents(query)
|
91 |
+
if not docs:
|
92 |
return "π« Inga trΓ€ffar"
|
93 |
out = []
|
94 |
+
for i, d in enumerate(docs, start=1):
|
95 |
src = d.metadata.get("source", "okΓ€nd")
|
96 |
snippet = d.page_content.replace("\n", " ")[:200]
|
97 |
out.append(f"{i}. ({src}) β¦{snippet}β¦")
|
98 |
return "\n\n".join(out)
|
99 |
|
100 |
+
def chat_fn(
|
101 |
+
query: str,
|
102 |
+
temperature: float,
|
103 |
+
history: list[dict]
|
104 |
+
) -> tuple[list[dict], list[dict]]:
|
105 |
+
"""
|
106 |
+
query: anvΓ€ndarens frΓ₯ga
|
107 |
+
temperature: slump-parameter fΓΆr LLM
|
108 |
+
history: tidigare chatt-meddelanden som lista av {"role","content"}
|
109 |
+
return: (uppdaterad_history, uppdaterad_history)
|
110 |
+
"""
|
111 |
+
logger.info(f"chat_fn() anropad med query={query!r}, temp={temperature}, history_len={len(history) if history else 0}")
|
112 |
history = history or []
|
113 |
+
|
114 |
+
if not isinstance(query, str):
|
115 |
+
msg = f"β Fel: query mΓ₯ste vara str, fick {type(query)}"
|
116 |
+
logger.error(msg)
|
117 |
+
history.append({"role": "assistant", "content": msg})
|
118 |
+
return history, history
|
119 |
+
|
120 |
if not query.strip():
|
121 |
+
msg = "β οΈ Du mΓ₯ste skriva en frΓ₯ga."
|
122 |
+
logger.warning(msg)
|
123 |
+
history.append({"role": "assistant", "content": msg})
|
124 |
return history, history
|
125 |
|
126 |
history.append({"role": "user", "content": query})
|
127 |
|
128 |
+
# Bygg prompt
|
129 |
docs = retriever.get_relevant_documents(query)
|
130 |
context = "\n\n---\n\n".join(d.page_content for d in docs)
|
131 |
prompt = f"Kontekst:\n{context}\n\nFrΓ₯ga: {query}"
|
132 |
+
logger.debug(f"Prompt till QA-kedjan: {prompt[:200]}...")
|
133 |
|
134 |
+
# Justera temp
|
135 |
+
try:
|
136 |
+
llm.pipeline.model_kwargs["temperature"] = temperature
|
137 |
+
except Exception as e:
|
138 |
+
logger.exception("Kunde inte sΓ€tta temperature")
|
139 |
+
|
140 |
try:
|
141 |
svar = qa_chain.run(prompt)
|
142 |
except Exception as e:
|
143 |
+
logger.exception("Fel vid anrop av qa_chain")
|
144 |
+
svar = f"β Fel vid QA-kedjan: {e}"
|
145 |
|
146 |
+
# Ange kΓ€lla
|
147 |
src = docs[0].metadata.get("source", "okΓ€nd") if docs else "okΓ€nd"
|
148 |
content = f"**(Dokument: {src})**\n\n{svar}"
|
149 |
history.append({"role": "assistant", "content": content})
|
150 |
+
logger.info(f"chat_fn fΓ€rdig, svarslΓ€ngd={len(svar)} tecken, totalt history={len(history)}")
|
151 |
|
152 |
return history, history
|
153 |
|
|
|
171 |
chatbot = gr.Chatbot(type="messages", label="Chatt")
|
172 |
chat_state = gr.State([])
|
173 |
|
174 |
+
ping_btn.click(fn=ping, inputs=[], outputs=[ping_out])
|
175 |
+
test_btn.click(fn=test_retrieval, inputs=[txt], outputs=[test_out])
|
|
|
176 |
send.click(fn=chat_fn, inputs=[txt, temp, chat_state], outputs=[chatbot, chat_state])
|
177 |
|
178 |
if __name__ == "__main__":
|
179 |
+
# share=True om du vill publikt dela lΓ€nken
|
180 |
demo.launch(share=True)
|
181 |
+
|