tomas.helmfridsson commited on
Commit
bdb1db1
Β·
1 Parent(s): 41104fb
Files changed (1) hide show
  1. app.py +62 -23
app.py CHANGED
@@ -22,16 +22,22 @@ DEFAULT_TEMP = 0.3
22
  K = 10
23
 
24
  # ── LOGGING ─────────────────────────────────────────────────
25
- logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
 
 
 
26
 
27
  # ── 1) Bygg eller ladda FAISS-index ─────────────────────────
28
  emb = HuggingFaceEmbeddings(model_name=EMB_MODEL)
29
  if os.path.isdir(INDEX_DIR):
30
- logging.info(f"πŸ”„ Laddar befintligt FAISS-index frΓ₯n `{INDEX_DIR}`…")
31
  vs = FAISS.load_local(INDEX_DIR, emb)
32
  else:
33
- logging.info("βš™οΈ Bygger nytt FAISS-index frΓ₯n PDF-filer…")
34
- splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
 
 
35
  all_docs, files = [], []
36
  for fn in os.listdir(DOCS_DIR):
37
  if fn.lower().endswith(".pdf"):
@@ -45,9 +51,10 @@ else:
45
  files.append(fn)
46
  vs = FAISS.from_documents(all_docs, emb)
47
  vs.save_local(INDEX_DIR)
48
- logging.info(f"βœ… Sparade index i `{INDEX_DIR}` ({len(files)} PDF, {len(all_docs)} chunkar)")
49
 
50
  # ── 2) Initiera LLM ──────────────────────────────────────────
 
51
  pipe = pipeline(
52
  "text-generation",
53
  model=LLM_MODEL,
@@ -56,9 +63,12 @@ pipe = pipeline(
56
  )
57
  llm = HuggingFacePipeline(
58
  pipeline=pipe,
59
- model_kwargs={"temperature": DEFAULT_TEMP, "max_new_tokens": MAX_NEW_TOKENS}
 
 
 
60
  )
61
- logging.info("βœ… LLM-pipeline initierad")
62
 
63
  # ── 3) Bygg RetrievalQA ─────────────────────────────────────
64
  retriever = vs.as_retriever(search_kwargs={"k": K})
@@ -68,48 +78,76 @@ qa_chain = RetrievalQA.from_chain_type(
68
  return_source_documents=False,
69
  chain_type="stuff"
70
  )
71
- logging.info(f"βœ… RetrievalQA klar (k={K})")
72
 
73
  # ── 4) Gradio-funktioner ────────────────────────────────────
74
- def ping():
 
75
  return "pong"
76
 
77
  def test_retrieval(query: str) -> str:
78
- hits = retriever.get_relevant_documents(query)
79
- if not hits:
 
80
  return "🚫 Inga trΓ€ffar"
81
  out = []
82
- for i, d in enumerate(hits, start=1):
83
  src = d.metadata.get("source", "okΓ€nd")
84
  snippet = d.page_content.replace("\n", " ")[:200]
85
  out.append(f"{i}. ({src}) …{snippet}…")
86
  return "\n\n".join(out)
87
 
88
- def chat_fn(query: str, temperature: float, history: list[dict]):
89
- # history Γ€r en lista av {role:..., content:...}
 
 
 
 
 
 
 
 
 
 
90
  history = history or []
 
 
 
 
 
 
 
91
  if not query.strip():
92
- history.append({"role": "assistant", "content": "⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
 
 
93
  return history, history
94
 
95
  history.append({"role": "user", "content": query})
96
 
97
- # HΓ€mta top-K dokument och slΓ₯ ihop till prompt
98
  docs = retriever.get_relevant_documents(query)
99
  context = "\n\n---\n\n".join(d.page_content for d in docs)
100
  prompt = f"Kontekst:\n{context}\n\nFrΓ₯ga: {query}"
 
101
 
102
- # SΓ€tt temperatur
103
- llm.pipeline.model_kwargs["temperature"] = temperature
104
-
 
 
 
105
  try:
106
  svar = qa_chain.run(prompt)
107
  except Exception as e:
108
- svar = f"❌ Fel vid QA: {e}"
 
109
 
 
110
  src = docs[0].metadata.get("source", "okΓ€nd") if docs else "okΓ€nd"
111
  content = f"**(Dokument: {src})**\n\n{svar}"
112
  history.append({"role": "assistant", "content": content})
 
113
 
114
  return history, history
115
 
@@ -133,10 +171,11 @@ with gr.Blocks() as demo:
133
  chatbot = gr.Chatbot(type="messages", label="Chatt")
134
  chat_state = gr.State([])
135
 
136
- # OBS: alla click-anrop anvΓ€nder listor fΓΆr inputs/outputs
137
- ping_btn.click(fn=ping, inputs=[], outputs=[ping_out])
138
- test_btn.click(fn=test_retrieval, inputs=[txt], outputs=[test_out])
139
  send.click(fn=chat_fn, inputs=[txt, temp, chat_state], outputs=[chatbot, chat_state])
140
 
141
  if __name__ == "__main__":
 
142
  demo.launch(share=True)
 
 
22
  K = 10
23
 
24
  # ── LOGGING ─────────────────────────────────────────────────
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(asctime)s %(levelname)s %(message)s"
28
+ )
29
+ logger = logging.getLogger(__name__)
30
 
31
  # ── 1) Bygg eller ladda FAISS-index ─────────────────────────
32
  emb = HuggingFaceEmbeddings(model_name=EMB_MODEL)
33
  if os.path.isdir(INDEX_DIR):
34
+ logger.info(f"πŸ”„ Laddar befintligt FAISS-index frΓ₯n `{INDEX_DIR}`…")
35
  vs = FAISS.load_local(INDEX_DIR, emb)
36
  else:
37
+ logger.info("βš™οΈ Bygger nytt FAISS-index frΓ₯n PDF-filer…")
38
+ splitter = RecursiveCharacterTextSplitter(
39
+ chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
40
+ )
41
  all_docs, files = [], []
42
  for fn in os.listdir(DOCS_DIR):
43
  if fn.lower().endswith(".pdf"):
 
51
  files.append(fn)
52
  vs = FAISS.from_documents(all_docs, emb)
53
  vs.save_local(INDEX_DIR)
54
+ logger.info(f"βœ… Sparade index i `{INDEX_DIR}` ({len(files)} PDF, {len(all_docs)} chunkar)")
55
 
56
  # ── 2) Initiera LLM ──────────────────────────────────────────
57
+ logger.info("πŸš€ Initierar LLM-pipeline…")
58
  pipe = pipeline(
59
  "text-generation",
60
  model=LLM_MODEL,
 
63
  )
64
  llm = HuggingFacePipeline(
65
  pipeline=pipe,
66
+ model_kwargs={
67
+ "temperature": DEFAULT_TEMP,
68
+ "max_new_tokens": MAX_NEW_TOKENS
69
+ }
70
  )
71
+ logger.info("βœ… LLM-pipeline initierad")
72
 
73
  # ── 3) Bygg RetrievalQA ─────────────────────────────────────
74
  retriever = vs.as_retriever(search_kwargs={"k": K})
 
78
  return_source_documents=False,
79
  chain_type="stuff"
80
  )
81
+ logger.info(f"βœ… RetrievalQA klar (k={K})")
82
 
83
  # ── 4) Gradio-funktioner ────────────────────────────────────
84
+ def ping() -> str:
85
+ logger.debug("ping() anropad")
86
  return "pong"
87
 
88
  def test_retrieval(query: str) -> str:
89
+ logger.debug(f"test_retrieval() anropad med query={query!r}")
90
+ docs = retriever.get_relevant_documents(query)
91
+ if not docs:
92
  return "🚫 Inga trΓ€ffar"
93
  out = []
94
+ for i, d in enumerate(docs, start=1):
95
  src = d.metadata.get("source", "okΓ€nd")
96
  snippet = d.page_content.replace("\n", " ")[:200]
97
  out.append(f"{i}. ({src}) …{snippet}…")
98
  return "\n\n".join(out)
99
 
100
+ def chat_fn(
101
+ query: str,
102
+ temperature: float,
103
+ history: list[dict]
104
+ ) -> tuple[list[dict], list[dict]]:
105
+ """
106
+ query: anvΓ€ndarens frΓ₯ga
107
+ temperature: slump-parameter fΓΆr LLM
108
+ history: tidigare chatt-meddelanden som lista av {"role","content"}
109
+ return: (uppdaterad_history, uppdaterad_history)
110
+ """
111
+ logger.info(f"chat_fn() anropad med query={query!r}, temp={temperature}, history_len={len(history) if history else 0}")
112
  history = history or []
113
+
114
+ if not isinstance(query, str):
115
+ msg = f"❌ Fel: query mΓ₯ste vara str, fick {type(query)}"
116
+ logger.error(msg)
117
+ history.append({"role": "assistant", "content": msg})
118
+ return history, history
119
+
120
  if not query.strip():
121
+ msg = "⚠️ Du mΓ₯ste skriva en frΓ₯ga."
122
+ logger.warning(msg)
123
+ history.append({"role": "assistant", "content": msg})
124
  return history, history
125
 
126
  history.append({"role": "user", "content": query})
127
 
128
+ # Bygg prompt
129
  docs = retriever.get_relevant_documents(query)
130
  context = "\n\n---\n\n".join(d.page_content for d in docs)
131
  prompt = f"Kontekst:\n{context}\n\nFrΓ₯ga: {query}"
132
+ logger.debug(f"Prompt till QA-kedjan: {prompt[:200]}...")
133
 
134
+ # Justera temp
135
+ try:
136
+ llm.pipeline.model_kwargs["temperature"] = temperature
137
+ except Exception as e:
138
+ logger.exception("Kunde inte sΓ€tta temperature")
139
+
140
  try:
141
  svar = qa_chain.run(prompt)
142
  except Exception as e:
143
+ logger.exception("Fel vid anrop av qa_chain")
144
+ svar = f"❌ Fel vid QA-kedjan: {e}"
145
 
146
+ # Ange kΓ€lla
147
  src = docs[0].metadata.get("source", "okΓ€nd") if docs else "okΓ€nd"
148
  content = f"**(Dokument: {src})**\n\n{svar}"
149
  history.append({"role": "assistant", "content": content})
150
+ logger.info(f"chat_fn fΓ€rdig, svarslΓ€ngd={len(svar)} tecken, totalt history={len(history)}")
151
 
152
  return history, history
153
 
 
171
  chatbot = gr.Chatbot(type="messages", label="Chatt")
172
  chat_state = gr.State([])
173
 
174
+ ping_btn.click(fn=ping, inputs=[], outputs=[ping_out])
175
+ test_btn.click(fn=test_retrieval, inputs=[txt], outputs=[test_out])
 
176
  send.click(fn=chat_fn, inputs=[txt, temp, chat_state], outputs=[chatbot, chat_state])
177
 
178
  if __name__ == "__main__":
179
+ # share=True om du vill publikt dela lΓ€nken
180
  demo.launch(share=True)
181
+